Infrastructure for new group kernels, and new C kernels

author Erik Lindahl <erik@kth.se>

Sun, 28 Oct 2012 18:45:52 +0000 (14:45 -0400)

committer Mark Abraham <mark.j.abraham@gmail.com>

Thu, 8 Nov 2012 07:42:10 +0000 (08:42 +0100)
author Erik Lindahl <erik@kth.se>
Sun, 28 Oct 2012 18:45:52 +0000 (14:45 -0400)
committer Mark Abraham <mark.j.abraham@gmail.com>
Thu, 8 Nov 2012 07:42:10 +0000 (08:42 +0100)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index c413e515e994c54309003f558a06a46b19176631..e3a9c41eec45d658bb9e98512ddd53cbd3143806 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -409,6 +409,9 @@ check_function_exists(sigaction         HAVE_SIGACTION)
  check_function_exists(sysconf           HAVE_SYSCONF)
  check_function_exists(sched_setaffinity HAVE_SCHED_SETAFFINITY)
  check_function_exists(sched_getaffinity HAVE_SCHED_GETAFFINITY)
+check_function_exists(rsqrt             HAVE_RSQRT)
+check_function_exists(rsqrtf            HAVE_RSQRTF)
+check_function_exists(sqrtf             HAVE_SQRTF)
  
  include(CheckLibraryExists)
  check_library_exists(m sqrt "" HAVE_LIBM)
@@ -670,7 +673,7 @@ include(gmxTestInline)
  gmx_test_inline(INLINE_KEYWORD)
  
  include(gmxTestRestrict)
-gmx_test_inline(RESTRICT_KEYWORD)
+gmx_test_restrict(RESTRICT_KEYWORD)
  
  include(gmxTestPipes)
  gmx_test_pipes(HAVE_PIPES)
diff --git a/cmake/gmxTestInline.cmake b/cmake/gmxTestInline.cmake

index f1108b6e10f3fdb7747b1fb278108f1a9d3296bd..e107ac46085b054be2bf28ae69860fe9bea98bb9 100644 (file)
--- a/cmake/gmxTestInline.cmake
+++ b/cmake/gmxTestInline.cmake
@@ -11,20 +11,20 @@ MACRO(GMX_TEST_INLINE VARIABLE)
  
          MESSAGE(STATUS "Checking for inline keyword")
  
-       FOREACH(KEYWORD "inline" "__inline__" "__inline")
+       FOREACH(KEYWORD "__inline__" "__inline" "inline")
              IF(NOT TEST_${VARIABLE})
                  TRY_COMPILE(TEST_${VARIABLE} "${CMAKE_BINARY_DIR}"    
                              "${CMAKE_SOURCE_DIR}/cmake/TestInline.c"
                              COMPILE_DEFINITIONS "-DTESTINLINEDEF=${KEYWORD}" )
-                SET(CHK_INLINE_KEYWORD ${KEYWORD})
-            ENDIF(NOT TEST_${VARIABLE})
+                SET(LAST_INLINE_KEYWORD ${KEYWORD})
+           ENDIF(NOT TEST_${VARIABLE})
          ENDFOREACH(KEYWORD)
-             
+
          IF(TEST_${VARIABLE})
-            SET(${VARIABLE} ${KEYWORD})
-            MESSAGE(STATUS "Checking for inline keyword - ${CHK_INLINE_KEYWORD}")
+            SET(${VARIABLE} ${LAST_INLINE_KEYWORD} CACHE INTERNAL "Inline keyword" FORCE)
+            MESSAGE(STATUS "Checking for inline keyword - ${LAST_INLINE_KEYWORD}")
          ELSE(TEST_${VARIABLE})
-           SET(${VARIABLE} " ")
+           SET(${VARIABLE} " " CACHE INTERNAL "Inline keyword" FORCE)
              MESSAGE(STATUS "Checking for inline keyword - not found")
          ENDIF(TEST_${VARIABLE})
  
diff --git a/cmake/gmxTestRestrict.cmake b/cmake/gmxTestRestrict.cmake

index d2bc2b5b3c12aad96785c857bc8ddf789a621930..f23819bb89b227baa82ee9deeaddd366f67fef39 100644 (file)
--- a/cmake/gmxTestRestrict.cmake
+++ b/cmake/gmxTestRestrict.cmake
@@ -17,15 +17,15 @@ MACRO(GMX_TEST_RESTRICT VARIABLE)
                  TRY_COMPILE(TEST_${VARIABLE} "${CMAKE_BINARY_DIR}"    
                              "${CMAKE_SOURCE_DIR}/cmake/TestRestrict.c"
                              COMPILE_DEFINITIONS "-DTESTRESTRICTDEF=${KEYWORD}" )
-                SET(CHK_RESTRICT_KEYWORD ${KEYWORD})
+                SET(LAST_RESTRICT_KEYWORD ${KEYWORD})
              ENDIF(NOT TEST_${VARIABLE})
          ENDFOREACH(KEYWORD)
-             
+
          IF(TEST_${VARIABLE})
-            SET(${VARIABLE} ${KEYWORD})
-            MESSAGE(STATUS "Checking for restrict keyword - ${CHK_RESTRICT_KEYWORD}")
+            SET(${VARIABLE} ${LAST_RESTRICT_KEYWORD} CACHE INTERNAL "Restrict keyword" FORCE)
+            MESSAGE(STATUS "Checking for restrict keyword - ${LAST_RESTRICT_KEYWORD}")
          ELSE(TEST_${VARIABLE})
-           SET(${VARIABLE} " ")
+           SET(${VARIABLE} " " CACHE INTERNAL "Restrict keyword" FORCE)
              MESSAGE(STATUS "Checking for restrict keyword - not found")
          ENDIF(TEST_${VARIABLE})
  
diff --git a/include/force.h b/include/force.h

index 758b748dbf6bd43a981f4c255408ceb1e701fd42..d9bb5294c25af069fa0294cbaf8b39497cb65a10 100644 (file)
--- a/include/force.h
+++ b/include/force.h
@@ -138,13 +138,18 @@ gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
  
  void init_interaction_const_tables(FILE *fp, 
                                     interaction_const_t *ic,
-                                   int verlet_kernel_type);
+                                   int cutoff_scheme,
+                                   int verlet_kernel_type,
+                                   real rtab);
  /* Initializes the tables in the interaction constant data structure.
+ * Setting verlet_kernel_type to -1 always initializes tables for
+ * use with group kernels.
   */
  
  void init_interaction_const(FILE *fp, 
                              interaction_const_t **interaction_const,
-                            const t_forcerec *fr);
+                            const t_forcerec *fr,
+                            real  rtab);
  /* Initializes the interaction constant data structure. Currently it 
   * uses forcerec as input. 
   */
@@ -242,9 +247,7 @@ void ns(FILE       *fplog,
                real       *dvdlambda,
                gmx_grppairener_t *grppener,
                gmx_bool       bFillGrid,
-              gmx_bool       bDoLongRange,
-              gmx_bool       bDoForces,
-              rvec       *f);
+           gmx_bool       bDoLongRangeNS);
  /* Call the neighborsearcher */
  
  extern void do_force_lowlevel(FILE         *fplog,  
@@ -259,7 +262,8 @@ extern void do_force_lowlevel(FILE         *fplog,
                               t_grpopts    *opts,
                               rvec         x[],
                               history_t    *hist,
-                             rvec         f[],    
+                             rvec         f_shortrange[],
+                  rvec         f_longrange[],
                               gmx_enerdata_t *enerd,
                               t_fcdata     *fcd,
                               gmx_mtop_t     *mtop,
diff --git a/include/names.h b/include/names.h

index a654b3838824288238ba78fec0d1d272417d3a8a..7b2e56aa2641f22fa2273c874b06d8a25f8c83b4 100644 (file)
--- a/include/names.h
+++ b/include/names.h
@@ -78,7 +78,6 @@ extern const char *elmceq_names[elmceqNR+1];
  extern const char *separate_dhdl_file_names[esepdhdlfileNR+1];
  extern const char *dhdl_derivatives_names[edhdlderivativesNR+1];
  extern const char *esol_names[esolNR+1];
-extern const char *enlist_names[enlistNR+1];
  extern const char *edispc_names[edispcNR+1];
  extern const char *ecm_names[ecmNR+1];
  extern const char *eann_names[eannNR+1];
@@ -98,6 +97,9 @@ extern const char *eMultentOpt_names[eMultentOptNR+1];
  extern const char *eAdresstype_names[eAdressNR+1];
  extern const char *eAdressICtype_names[eAdressICNR+1];
  extern const char *eAdressSITEtype_names[eAdressSITENR+1];
+extern const char *gmx_nblist_geometry_names[GMX_NBLIST_GEOMETRY_NR+1];
+extern const char *gmx_nbkernel_elec_names[GMX_NBKERNEL_ELEC_NR+1];
+extern const char *gmx_nbkernel_vdw_names[GMX_NBKERNEL_VDW_NR+1];
  
  #define        UNDEFINED               "UNDEFINED"
  #define ENUM_NAME(e,max,names) ((((e)<0)||((e)>=(max)))?UNDEFINED:(names)[e])
diff --git a/include/nonbonded.h b/include/nonbonded.h

index a00bbd99eb6cd2bcc7931fe7501df60f65836c14..23aa6421f74402242cd48e6e5ad81080047c976c 100644 (file)
--- a/include/nonbonded.h
+++ b/include/nonbonded.h
@@ -45,33 +45,48 @@
  #ifdef __cplusplus
  extern "C" {
  #endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
+
+
+
+void
+gmx_nonbonded_setup(FILE *         fplog,
+                    t_forcerec *   fr,
+                    gmx_bool       bGenericKernelOnly);
+
+
+
+
+
+void
+gmx_nonbonded_set_kernel_pointers(FILE *       fplog,
+                                  t_nblist *   nl);
+
  
-void gmx_setup_kernels(FILE *fplog,t_forcerec *fr,gmx_bool bGenericKernelOnly);
-void gmx_setup_adress_kernels(FILE *fplog,gmx_bool bGenericKernelOnly);
  
-#define GMX_DONB_LR             (1<<0)
-#define GMX_DONB_FORCES         (1<<1)
-#define GMX_DONB_FOREIGNLAMBDA  (1<<2)
+#define GMX_NONBONDED_DO_LR             (1<<0)
+#define GMX_NONBONDED_DO_FORCE          (1<<1)
+#define GMX_NONBONDED_DO_FOREIGNLAMBDA  (1<<2)
+#define GMX_NONBONDED_DO_POTENTIAL      (1<<3)
+#define GMX_NONBONDED_DO_SR             (1<<4)
  
  void
  do_nonbonded(t_commrec *cr,t_forcerec *fr,
-             rvec x[],rvec f[],t_mdatoms *md,t_blocka *excl,
-             real egnb[],real egcoul[],real egb[],rvec box_size,
+             rvec x[],rvec f_shortrange[],rvec f_longrange[],t_mdatoms *md,t_blocka *excl,
+             gmx_grppairener_t *grppener,rvec box_size,
               t_nrnb *nrnb,real *lambda,real dvdlambda[],
               int nls,int eNL,int flags);
  
-/* Calculate VdW/charge pair interactions (usually 1-4 interactions).
+/* Calculate VdW/charge listed pair interactions (usually 1-4 interactions).
   * global_atom_index is only passed for printing error messages.
   */
  real
-do_listed_vdw_q(int ftype,int nbonds,
-               const t_iatom iatoms[],const t_iparams iparams[],
-               const rvec x[],rvec f[],rvec fshift[],
-               const t_pbc *pbc,const t_graph *g,
-               real *lambda,real *dvdlambda,
-               const t_mdatoms *md,
-               const t_forcerec *fr,gmx_grppairener_t *grppener,
-               int *global_atom_index);
+do_nonbonded_listed(int ftype,int nbonds,const t_iatom iatoms[],const t_iparams iparams[],
+                    const rvec x[],rvec f[],rvec fshift[],const t_pbc *pbc,const t_graph *g,
+                    real *lambda, real *dvdl,const t_mdatoms *md,const t_forcerec *fr,
+                    gmx_grppairener_t *grppener,int *global_atom_index);
  
  #ifdef __cplusplus
  }
diff --git a/include/ns.h b/include/ns.h

index bfe9b4c9af96bb7ed3d158b53874cf054161d596..d59af03482f66d3883adfa2a8f2f814ebbe54021 100644 (file)
--- a/include/ns.h
+++ b/include/ns.h
@@ -89,8 +89,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
                              real *lambda,real *dvdlambda,
                              gmx_grppairener_t *grppener,
                              gmx_bool bFillGrid,
-                            gmx_bool bDoLongRange,
-                            gmx_bool bDoForces,rvec *f);
+                            gmx_bool bDoLongRangeNS);
   
  
  /* Debugging routines from wnblist.c */
diff --git a/include/string2.h b/include/string2.h

index bafb60b47191cf84beb19af01cb08ec7ff97ea2a..645316b319c835238e02ebf7ad56e985c27f8fab 100644 (file)
--- a/include/string2.h
+++ b/include/string2.h
@@ -76,6 +76,9 @@
  #ifdef __cplusplus
  extern "C" {
  #endif
+#if 0
+}
+#endif
  
  #define CONTINUE    '\\'
  #define COMMENTSIGN ';'
@@ -111,6 +114,21 @@ int gmx_strncasecmp(const char *str1, const char *str2, int n);
  
  char *gmx_strdup(const char *src);
  char *gmx_strndup(const char *src, int n);
+
+/* Magic hash initialization number from Dan J. Bernstein. */
+extern const unsigned int
+gmx_string_hash_init;
+
+/* Return a hash of the string according to Dan J. Bernsteins algorithm.
+ * This routine only uses characters for which isalnum(c) is true,
+ * and all characters are converted to upper case.
+ * On the first invocation for a new string, use the constant
+ * gmx_string_hash_init for the second argument. If you want to create a hash
+ * corresponding to several concatenated strings, provide the returned hash
+ * value as hash_init for the second string, etc.
+ */
+unsigned int
+gmx_string_hash_func(const char *s, unsigned int hash_init);
      
  /** Pattern matcing with wildcards. */
  int gmx_wcmatch(const char *pattern, const char *src);
@@ -138,7 +156,7 @@ char **split(char sep,char *str);
  /* Implementation of the well-known Perl function split */
  
  gmx_large_int_t str_to_large_int_t(const char *str, char **endptr);
-
+    
  #ifdef GMX_NATIVE_WINDOWS
  #define snprintf _snprintf
  #endif
diff --git a/include/tables.h b/include/tables.h

index ec942425a03f15885ddc991bf73dc07bf4de42a0..262da15e2c887cf4e9cf1c4b12b47cd2e6a255eb 100644 (file)
--- a/include/tables.h
+++ b/include/tables.h
@@ -40,14 +40,19 @@
  #ifdef __cplusplus
  extern "C" {
  #endif
+#if 0
+}
+#endif
  
-
-void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
-                                 int ntab,int tableformat,
-                                 real dr,real beta);
-/* Fill table tabf of size ntab with spacing dr with the ewald long-range
- * (mesh) force and with tableformatF and tabv!=NULL, fill tabv energy.
- * With tableformatFDV0 the size of the tabf array should be ntab*4, tabv=NULL.
+void table_spline3_fill_ewald_lr(real *table_F,
+                                 real *table_V,
+                                 real *table_FDV0,
+                                 int   ntab,
+                                 real  dx,
+                                 real  beta);
+/* Fill tables of ntab points with spacing dr with the ewald long-range
+ * (mesh) force.
+ * There are three separate tables with format FDV0, F, and V.
   * This function interpolates the Ewald mesh potential contribution
   * with coefficient beta using a quadratic spline.
   * The force can then be interpolated linearly.
diff --git a/include/types/enums.h b/include/types/enums.h

index 9bc5948fbcfece19e1e466698e5c268ec872ecd5..7b0fa51d5ce80b6368d4012ee131daaf9eca6daa 100644 (file)
--- a/include/types/enums.h
+++ b/include/types/enums.h
@@ -39,6 +39,9 @@
  #ifdef __cplusplus
  extern "C" {
  #endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
  
  /* note: these enums should correspond to the names in gmxlib/names.c */
  
@@ -76,15 +79,16 @@ enum {
    erscNO, erscALL, erscCOM, erscNR
  };
  
-enum { 
+enum {
    ecutsGROUP, ecutsVERLET, ecutsNR
  };
  
  /* Coulomb / VdW interaction modifiers.
   * grompp replaces eintmodPOTSHIFT_VERLET by eintmodPOTSHIFT or eintmodNONE.
+ * Exactcutoff is only used by Reaction-field-zero, and is not user-selectable.
   */
-enum {
-    eintmodPOTSHIFT_VERLET, eintmodPOTSHIFT, eintmodNONE, eintmodNR
+enum eintmod {
+    eintmodPOTSHIFT_VERLET, eintmodPOTSHIFT, eintmodNONE, eintmodPOTSWITCH, eintmodEXACTCUTOFF, eintmodNR
  };
  
  /*
@@ -345,6 +349,86 @@ enum {
    eAdressSITEcom,eAdressSITEcog, eAdressSITEatom, eAdressSITEatomatom, eAdressSITENR
  };
  
+
+/* The interactions contained in a (possibly merged) table
+ * for computing electrostatic, VDW repulsion and/or VDW dispersion 
+ * contributions.
+ */
+enum gmx_table_interaction
+{
+    GMX_TABLE_INTERACTION_ELEC,
+    GMX_TABLE_INTERACTION_VDWREP_VDWDISP,
+    GMX_TABLE_INTERACTION_VDWEXPREP_VDWDISP,
+    GMX_TABLE_INTERACTION_VDWDISP,
+    GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP,
+    GMX_TABLE_INTERACTION_ELEC_VDWEXPREP_VDWDISP,
+    GMX_TABLE_INTERACTION_ELEC_VDWDISP,
+    GMX_TABLE_INTERACTION_NR
+};
+
+/* Different formats for table data. Cubic spline tables are typically stored
+ * with the four Y,F,G,H intermediate values (check tables.c for format), which
+ * makes it easy to load with a single 4-way SIMD instruction too.
+ * Linear tables only need one value per table point, or two if both V and F
+ * are calculated. However, with SIMD instructions this makes the loads unaligned,
+ * and in that case we store the data as F, D=F(i+1)-F(i), V, and then a blank value,
+ * which again makes it possible to load as a single instruction.
+ */
+enum gmx_table_format
+{
+    GMX_TABLE_FORMAT_CUBICSPLINE_YFGH,
+    GMX_TABLE_FORMAT_LINEAR_VF,
+    GMX_TABLE_FORMAT_LINEAR_V,
+    GMX_TABLE_FORMAT_LINEAR_F,
+    GMX_TABLE_FORMAT_LINEAR_FDV0,
+    GMX_TABLE_FORMAT_NR
+};
+
+/* Neighborlist geometry type.
+ * Kernels will compute interactions between two particles, 
+ * 3-center water, 4-center water or coarse-grained beads.
+ */
+enum gmx_nblist_kernel_geometry
+{
+    GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE,
+    GMX_NBLIST_GEOMETRY_WATER3_PARTICLE,
+    GMX_NBLIST_GEOMETRY_WATER3_WATER3,
+    GMX_NBLIST_GEOMETRY_WATER4_PARTICLE,
+    GMX_NBLIST_GEOMETRY_WATER4_WATER4,
+    GMX_NBLIST_GEOMETRY_CG_CG,
+    GMX_NBLIST_GEOMETRY_NR
+};
+
+/* Types of electrostatics calculations available inside nonbonded kernels.
+ * Note that these do NOT necessarily correspond to the user selections in the MDP file;
+ * many interactions for instance map to tabulated kernels.
+ */
+enum gmx_nbkernel_elec
+{
+    GMX_NBKERNEL_ELEC_NONE,
+    GMX_NBKERNEL_ELEC_COULOMB,
+    GMX_NBKERNEL_ELEC_REACTIONFIELD,
+    GMX_NBKERNEL_ELEC_CUBICSPLINETABLE,
+    GMX_NBKERNEL_ELEC_GENERALIZEDBORN,
+    GMX_NBKERNEL_ELEC_EWALD,
+    GMX_NBKERNEL_ELEC_NR
+};
+
+/* Types of vdw calculations available inside nonbonded kernels.
+ * Note that these do NOT necessarily correspond to the user selections in the MDP file;
+ * many interactions for instance map to tabulated kernels.
+ */
+enum gmx_nbkernel_vdw
+{
+    GMX_NBKERNEL_VDW_NONE,
+    GMX_NBKERNEL_VDW_LENNARDJONES,
+    GMX_NBKERNEL_VDW_BUCKINGHAM,
+    GMX_NBKERNEL_VDW_CUBICSPLINETABLE,
+    GMX_NBKERNEL_VDW_NR
+};
+
+
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/include/types/force_flags.h b/include/types/force_flags.h

index 759b31587a1fe604abfa12d338492512507e34a4..4b22e5b6eca520b7da4e864da6b08f05348dc8b4 100644 (file)
--- a/include/types/force_flags.h
+++ b/include/types/force_flags.h
@@ -50,8 +50,8 @@ extern "C" {
  #define GMX_FORCE_DYNAMICBOX   (1<<1)
  /* Do neighbor searching */
  #define GMX_FORCE_NS           (1<<2)
-/* Calculate long-range energies/forces */
-#define GMX_FORCE_DOLR         (1<<3)
+/* Update long-range neighborlists */
+#define GMX_FORCE_LRNS         (1<<3)
  /* Calculate bonded energies/forces */
  #define GMX_FORCE_BONDED       (1<<4)
  /* Store long-range forces in a separate array */
@@ -66,6 +66,9 @@ extern "C" {
  #define GMX_FORCE_ENERGY       (1<<9)
  /* Calculate dHdl */
  #define GMX_FORCE_DHDL         (1<<10)
+/* Calculate long-range energies/forces */
+#define GMX_FORCE_DO_LR        (1<<11)
+
  /* Normally one want all energy terms and forces */
  #define GMX_FORCE_ALLFORCES    (GMX_FORCE_BONDED | GMX_FORCE_NONBONDED | GMX_FORCE_FORCES)
  
diff --git a/include/types/forcerec.h b/include/types/forcerec.h

index c94e382a089478cd8e3e249f5b654322d03828f0..a5b8c3e15e831fdd88e9494ee7c66fb3cdc7050e 100644 (file)
--- a/include/types/forcerec.h
+++ b/include/types/forcerec.h
@@ -44,30 +44,48 @@
  #ifdef __cplusplus
  extern "C" {
  #endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
  
  /* Abstract type for PME that is defined only in the routine that use them. */
  typedef struct gmx_pme *gmx_pme_t;
  
-typedef struct {
-  real r;         /* range of the table */
-  int  n;         /* n+1 is the number of points */
-  real scale;     /* distance between two points */
-  real scale_exp; /* distance for exponential Buckingham table */
-  real *tab;      /* the actual tables, per point there are  4 numbers for
-                  * Coulomb, dispersion and repulsion (in total 12 numbers)
-                  */
+
+
+/* Structure describing the data in a single table */
+typedef struct
+{
+    enum gmx_table_interaction  interaction; /* Types of interactions stored in this table */
+    enum gmx_table_format       format;      /* Interpolation type and data format */
+
+    real                        r;         /* range of the table */
+    int                         n;         /* n+1 is the number of table points */
+    real                        scale;     /* distance (nm) between two table points */
+    real                        scale_exp; /* distance for exponential part of VdW table, not always used */
+    real *                      data;      /* the actual table data */
+
+    /* Some information about the table layout. This can also be derived from the interpolation
+     * type and the table interactions, but it is convenient to have here for sanity checks, and it makes it
+     * much easier to access the tables in the nonbonded kernels when we can set the data from variables.
+     * It is always true that stride = formatsize*ninteractions
+     */
+    int                         formatsize;    /* Number of fp variables for each table point (1 for F, 2 for VF, 4 for YFGH, etc.) */
+    int                         ninteractions; /* Number of interactions in table, 1 for coul-only, 3 for coul+rep+disp. */
+    int                         stride;        /* Distance to next table point (number of fp variables per table point in total) */
  } t_forcetable;
  
-typedef struct {
-  t_forcetable tab;
-  /* We duplicate tables for cache optimization purposes */
-  real *coultab;      /* Coul only */
-  real *vdwtab;       /* Vdw only   */
-  /* The actual neighbor lists, short and long range, see enum above
-   * for definition of neighborlist indices.
-   */
-  t_nblist nlist_sr[eNL_NR];
-  t_nblist nlist_lr[eNL_NR];
+typedef struct
+{
+    t_forcetable   table_elec;
+    t_forcetable   table_vdw;
+    t_forcetable   table_elec_vdw;
+
+    /* The actual neighbor lists, short and long range, see enum above
+     * for definition of neighborlist indices.
+     */
+    t_nblist nlist_sr[eNL_NR];
+    t_nblist nlist_lr[eNL_NR];
  } t_nblists;
  
  /* macros for the cginfo data in forcerec */
@@ -173,6 +191,20 @@ typedef struct {
    gmx_hw_info_t *hwinfo;
    gmx_bool      use_cpu_acceleration;
  
+  /* Interaction for calculated in kernels. In many cases this is similar to
+   * the electrostatics settings in the inputrecord, but the difference is that
+   * these variables always specify the actual interaction in the kernel - if
+   * we are tabulating reaction-field the inputrec will say reaction-field, but
+   * the kernel interaction will say cubic-spline-table. To be safe we also
+   * have a kernel-specific setting for the modifiers - if the interaction is
+   * tabulated we already included the inputrec modification there, so the kernel
+   * modification setting will say 'none' in that case.
+   */
+  int nbkernel_elec_interaction;
+  int nbkernel_vdw_interaction;
+  int nbkernel_elec_modifier;
+  int nbkernel_vdw_modifier;
+
    /* Use special N*N kernels? */
    gmx_bool bAllvsAll;
    /* Private work data */
@@ -183,7 +215,7 @@ typedef struct {
     * Infinite cut-off's will be GMX_CUTOFF_INF (unlike in t_inputrec: 0).
     */
    real rlist,rlistlong;
-  
+
    /* Dielectric constant resp. multiplication factor for charges */
    real zsquare,temp;
    real epsilon_r,epsilon_rf,epsfac;  
@@ -198,6 +230,7 @@ typedef struct {
  
    /* Dispersion correction stuff */
    int  eDispCorr;
+
    /* The shift of the shift or user potentials */
    real enershiftsix;
    real enershifttwelve;
@@ -224,12 +257,12 @@ typedef struct {
    t_forcetable tab14; /* for 1-4 interactions only */
  
    /* PPPM & Shifting stuff */
-  gmx_bool coul_pot_shift;
+  int coulomb_modifier;
    real rcoulomb_switch,rcoulomb;
    real *phi;
  
    /* VdW stuff */
-  gmx_bool vdw_pot_shift;
+  int vdw_modifier;
    double reppow;
    real rvdw_switch,rvdw;
    real bham_b_max;
@@ -266,7 +299,7 @@ typedef struct {
    int  *gid2nblists;
    t_nblists *nblists;
  
-  int      cutoff_scheme; /* old- or Verlet-style cutoff */
+  int cutoff_scheme; /* group- or Verlet-style cutoff */
    gmx_bool bNonbonded;    /* true if nonbonded calculations are *not* turned off */
    nonbonded_verlet_t *nbv;
  
@@ -427,6 +460,10 @@ typedef struct {
    int  *excl_load;
  } t_forcerec;
  
+/* Important: Starting with Gromacs-4.6, the values of c6 and c12 in the nbfp array have
+ * been scaled by 6.0 or 12.0 to save flops in the kernels. We have corrected this everywhere
+ * in the code, but beware if you are using these macros externally.
+ */
  #define C6(nbfp,ntp,ai,aj)     (nbfp)[2*((ntp)*(ai)+(aj))]
  #define C12(nbfp,ntp,ai,aj)    (nbfp)[2*((ntp)*(ai)+(aj))+1]
  #define BHAMC(nbfp,ntp,ai,aj)  (nbfp)[3*((ntp)*(ai)+(aj))]
diff --git a/include/types/idef.h b/include/types/idef.h

index 7d44e4ccb2a881627feb6a51ec03d20fb22ba382..635c4e7141ed1596d46857c02ccafb244adceb1f 100644 (file)
--- a/include/types/idef.h
+++ b/include/types/idef.h
@@ -318,7 +318,7 @@ typedef struct
  typedef struct {
    int  n;         /* n+1 is the number of points */
    real scale;     /* distance between two points */
-  real *tab;      /* the actual tables, per point there are  4 numbers */
+  real *data;     /* the actual table data, per point there are 4 numbers */
  } bondedtable_t;
  
  #ifdef __cplusplus
diff --git a/include/types/inputrec.h b/include/types/inputrec.h

index b62fc949759cc3b1a22a9ae5e5b471e5c3f5dc07..a3d9c6fd7fd7c327eb62a2810ec4ee3124c6bddb 100644 (file)
--- a/include/types/inputrec.h
+++ b/include/types/inputrec.h
@@ -270,7 +270,7 @@ typedef struct {
    int  simulation_part; /* Used in checkpointing to separate chunks */
    gmx_large_int_t init_step;   /* start at a stepcount >0 (used w. tpbconv)    */
    int  nstcalcenergy;  /* frequency of energy calc. and T/P coupl. upd.        */
-  int  cutoff_scheme;   /* cut-off scheme: group or verlet              */
+  int  cutoff_scheme;   /* group or verlet cutoffs     */
    int  ns_type;                /* which ns method should we use?               */
    int  nstlist;                /* number of steps before pairlist is generated */
    int  ndelta;         /* number of cells per rlong                    */
@@ -313,8 +313,9 @@ typedef struct {
    rvec posres_comB;     /* The B-state COM of the posres atoms          */
    int  andersen_seed;   /* Random seed for Andersen thermostat (obsolete) */
    real verletbuf_drift; /* Max. drift (kJ/mol/ps/atom) for list buffer  */
-  real rlist;          /* short range pairlist cut-off (nm)            */
-  real rlistlong;      /* long range pairlist cut-off (nm)             */
+  real rlist;              /* short range pairlist cut-off (nm)                */
+  real rlistlong;          /* long range pairlist cut-off (nm)         */
+  int  nstcalclr;       /* Frequency of evaluating direct space long-range interactions */
    real rtpi;            /* Radius for test particle insertion           */
    int  coulombtype;    /* Type of electrostatics treatment             */
    int  coulomb_modifier; /* Modify the Coulomb interaction              */
diff --git a/include/types/interaction_const.h b/include/types/interaction_const.h

index 842d25b6224f02b60764580817cb2fbbca02e7b2..52f343bbca5ed92f8dc2c99d4871c5e670d5f1e6 100644 (file)
--- a/include/types/interaction_const.h
+++ b/include/types/interaction_const.h
@@ -40,8 +40,6 @@
  extern "C" {
  #endif
  
-enum { tableformatNONE, tableformatF, tableformatFDV0 };
-
  typedef struct {
      /* VdW */
      real rvdw;
@@ -55,7 +53,8 @@ typedef struct {
  
      /* Cut-off */
      real rlist;
-
+    real rlistlong;
+    
      /* PME/Ewald */
      real ewaldcoeff;
      real sh_ewald;   /* For shifting the Ewald potential */
@@ -72,7 +71,6 @@ typedef struct {
      /* Force/energy interpolation tables, linear in force, quadratic in V */
      real tabq_scale;
      int  tabq_size;
-    int  tabq_format;
      /* Coulomb force table, size of array is tabq_size (when used) */
      real *tabq_coul_F;
      /* Coulomb energy table, size of array is tabq_size (when used) */
diff --git a/include/types/nblist.h b/include/types/nblist.h

index ae5aea1853383ac0d009469cb3e8842694d8ea22..1e189cd7ef9a3eb5736426ad192eb507694d3b12 100644 (file)
--- a/include/types/nblist.h
+++ b/include/types/nblist.h
@@ -39,14 +39,6 @@
  extern "C" {
  #endif
  
-/* Neighborlist type */
-enum {
-  enlistATOM_ATOM,
-  enlistSPC_ATOM,   enlistSPC_SPC,
-  enlistTIP4P_ATOM, enlistTIP4P_TIP4P,
-  enlistCG_CG,
-  enlistNR
-};
  
  typedef unsigned long t_excl;
  
@@ -62,26 +54,31 @@ typedef unsigned long t_excl;
  
  typedef struct 
  {
-  int             enlist;      /* The type of nblist, enum, see above    */
-  int             il_code;      /* Innerloop index from nrnb.h, used     */
-                                /* for flop accounting.                  */
-  int             icoul;        /* Coulomb loop type index for kernels   */
-  int             ivdw;         /* VdW loop type index for kernels       */
-  int             free_energy;  /* Free energy setting for this list     */
+    int             igeometry;    /* The type of list (atom, water, etc.)  */
+    int             ielec;        /* Coulomb loop type index for kernels   */
+    int             ielecmod;     /* Coulomb modifier (e.g. switch/shift)  */
+    int             ivdw;         /* VdW loop type index for kernels       */
+    int             ivdwmod;      /* VdW modifier (e.g. switch/shift)      */
+    int             free_energy;  /* Free energy setting for this list     */
  
-  int             nri,maxnri;   /* Current/max number of i particles    */
-  int             nrj,maxnrj;   /* Current/max number of j particles    */
-  int             maxlen;       /* maxnr of j atoms for a single i atom  */
-  int *           iinr;                /* The i-elements                        */
-  int *           iinr_end;     /* The end atom, only with enlistCG      */
-  int *           gid;          /* Index in energy arrays                */
-  int *           shift;        /* Shift vector index                    */
-  int *           jindex;       /* Index in jjnr                         */
-  int *           jjnr;                /* The j-atom list                       */
-  int *           jjnr_end;     /* The end atom, only with enltypeCG     */
-  t_excl *        excl;         /* Exclusions, only with enltypeCG       */
-  int             count;        /* counter to multithread the innerloops */
-  void *          mtx;          /* mutex to lock the counter             */
+    int             nri,maxnri;   /* Current/max number of i particles    */
+    int             nrj,maxnrj;   /* Current/max number of j particles    */
+    int             maxlen;       /* maxnr of j atoms for a single i atom  */
+    int *           iinr;         /* The i-elements                        */
+    int *           iinr_end;     /* The end atom, only with enlistCG      */
+    int *           gid;          /* Index in energy arrays                */
+    int *           shift;        /* Shift vector index                    */
+    int *           jindex;       /* Index in jjnr                         */
+    int *           jjnr;         /* The j-atom list                       */
+    int *           jjnr_end;     /* The end atom, only with enltypeCG     */
+    t_excl *        excl;         /* Exclusions, only with enltypeCG       */
+
+    /* We use separate pointers for kernels that compute both potential
+     * and force (vf suffix), only potential (v) or only force (f)
+     */
+    void *          kernelptr_vf;
+    void *          kernelptr_v;
+    void *          kernelptr_f;
  } t_nblist;
  
  
diff --git a/include/types/nrnb.h b/include/types/nrnb.h

index aca5408c27f661478a4341f52e67d5be0b034ce1..0be7f1cd71144412a4d7b8772ef31b6e4247a881 100644 (file)
--- a/include/types/nrnb.h
+++ b/include/types/nrnb.h
@@ -39,57 +39,46 @@
  #ifdef __cplusplus
  extern "C" {
  #endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
  
-/* The nonbonded kernels are documented in gmxlib/nonbonded_kernels, 
- * but here's a lazy version of the numbering. The first position
- * is the Coulomb interaction (0 for none), second is Van der Waals
- * (again, 0 means no interaction), and the third is the water optimization
- * (0 meaning no water optimization = standard atom-atom loop)
- *
- *                                     value
- * pos                 1                   2           3              4
- * 1st Coul        Normal,1/r       Reaction-field  Table            Generalized born
- * 2nd Vdw         Lennard-Jones    Buckingham      Table             n/a
- * 3rd Water. opt  SPC-other atom   SPC-SPC         TIP4p-other at.  TIP4p-TIP4p 
- */
  
  #define eNR_NBKERNEL_NONE -1
  
  enum 
  {
-    eNR_NBKERNEL010, eNR_NBKERNEL020, eNR_NBKERNEL030,
-    eNR_NBKERNEL100, eNR_NBKERNEL101, eNR_NBKERNEL102, eNR_NBKERNEL103, eNR_NBKERNEL104,
-    eNR_NBKERNEL110, eNR_NBKERNEL111, eNR_NBKERNEL112, eNR_NBKERNEL113, eNR_NBKERNEL114,
-    eNR_NBKERNEL120, eNR_NBKERNEL121, eNR_NBKERNEL122, eNR_NBKERNEL123, eNR_NBKERNEL124,
-    eNR_NBKERNEL130, eNR_NBKERNEL131, eNR_NBKERNEL132, eNR_NBKERNEL133, eNR_NBKERNEL134,
-    eNR_NBKERNEL200, eNR_NBKERNEL201, eNR_NBKERNEL202, eNR_NBKERNEL203, eNR_NBKERNEL204,
-    eNR_NBKERNEL210, eNR_NBKERNEL211, eNR_NBKERNEL212, eNR_NBKERNEL213, eNR_NBKERNEL214,
-    eNR_NBKERNEL220, eNR_NBKERNEL221, eNR_NBKERNEL222, eNR_NBKERNEL223, eNR_NBKERNEL224,
-    eNR_NBKERNEL230, eNR_NBKERNEL231, eNR_NBKERNEL232, eNR_NBKERNEL233, eNR_NBKERNEL234,
-    eNR_NBKERNEL300, eNR_NBKERNEL301, eNR_NBKERNEL302, eNR_NBKERNEL303, eNR_NBKERNEL304,
-    eNR_NBKERNEL310, eNR_NBKERNEL311, eNR_NBKERNEL312, eNR_NBKERNEL313, eNR_NBKERNEL314,
-    eNR_NBKERNEL320, eNR_NBKERNEL321, eNR_NBKERNEL322, eNR_NBKERNEL323, eNR_NBKERNEL324,
-    eNR_NBKERNEL330, eNR_NBKERNEL331, eNR_NBKERNEL332, eNR_NBKERNEL333, eNR_NBKERNEL334,
-    eNR_NBKERNEL400, eNR_NBKERNEL410, eNR_NBKERNEL430,
-    eNR_NBKERNEL010NF, eNR_NBKERNEL020NF, eNR_NBKERNEL030NF,
-    eNR_NBKERNEL100NF, eNR_NBKERNEL101NF, eNR_NBKERNEL102NF, eNR_NBKERNEL103NF, eNR_NBKERNEL104NF,
-    eNR_NBKERNEL110NF, eNR_NBKERNEL111NF, eNR_NBKERNEL112NF, eNR_NBKERNEL113NF, eNR_NBKERNEL114NF,
-    eNR_NBKERNEL120NF, eNR_NBKERNEL121NF, eNR_NBKERNEL122NF, eNR_NBKERNEL123NF, eNR_NBKERNEL124NF,
-    eNR_NBKERNEL130NF, eNR_NBKERNEL131NF, eNR_NBKERNEL132NF, eNR_NBKERNEL133NF, eNR_NBKERNEL134NF,
-    eNR_NBKERNEL200NF, eNR_NBKERNEL201NF, eNR_NBKERNEL202NF, eNR_NBKERNEL203NF, eNR_NBKERNEL204NF,
-    eNR_NBKERNEL210NF, eNR_NBKERNEL211NF, eNR_NBKERNEL212NF, eNR_NBKERNEL213NF, eNR_NBKERNEL214NF,
-    eNR_NBKERNEL220NF, eNR_NBKERNEL221NF, eNR_NBKERNEL222NF, eNR_NBKERNEL223NF, eNR_NBKERNEL224NF,
-    eNR_NBKERNEL230NF, eNR_NBKERNEL231NF, eNR_NBKERNEL232NF, eNR_NBKERNEL233NF, eNR_NBKERNEL234NF,
-    eNR_NBKERNEL300NF, eNR_NBKERNEL301NF, eNR_NBKERNEL302NF, eNR_NBKERNEL303NF, eNR_NBKERNEL304NF,
-    eNR_NBKERNEL310NF, eNR_NBKERNEL311NF, eNR_NBKERNEL312NF, eNR_NBKERNEL313NF, eNR_NBKERNEL314NF,
-    eNR_NBKERNEL320NF, eNR_NBKERNEL321NF, eNR_NBKERNEL322NF, eNR_NBKERNEL323NF, eNR_NBKERNEL324NF,
-    eNR_NBKERNEL330NF, eNR_NBKERNEL331NF, eNR_NBKERNEL332NF, eNR_NBKERNEL333NF, eNR_NBKERNEL334NF,
-    eNR_NBKERNEL400NF, eNR_NBKERNEL410NF, eNR_NBKERNEL430NF, 
-    eNR_NBKERNEL_NR,
-    eNR_NBKERNEL_FREE_ENERGY = eNR_NBKERNEL_NR,
+    eNR_NBKERNEL_VDW_VF,
+    eNR_NBKERNEL_VDW_F,
+    eNR_NBKERNEL_ELEC_VF,
+    eNR_NBKERNEL_ELEC_F,
+    eNR_NBKERNEL_ELEC_W3_VF,
+    eNR_NBKERNEL_ELEC_W3_F,
+    eNR_NBKERNEL_ELEC_W3W3_VF,
+    eNR_NBKERNEL_ELEC_W3W3_F,
+    eNR_NBKERNEL_ELEC_W4_VF,
+    eNR_NBKERNEL_ELEC_W4_F,
+    eNR_NBKERNEL_ELEC_W4W4_VF,
+    eNR_NBKERNEL_ELEC_W4W4_F,
+    eNR_NBKERNEL_ELEC_VDW_VF,
+    eNR_NBKERNEL_ELEC_VDW_F,
+    eNR_NBKERNEL_ELEC_VDW_W3_VF,
+    eNR_NBKERNEL_ELEC_VDW_W3_F,
+    eNR_NBKERNEL_ELEC_VDW_W3W3_VF,
+    eNR_NBKERNEL_ELEC_VDW_W3W3_F,
+    eNR_NBKERNEL_ELEC_VDW_W4_VF,
+    eNR_NBKERNEL_ELEC_VDW_W4_F,
+    eNR_NBKERNEL_ELEC_VDW_W4W4_VF,
+    eNR_NBKERNEL_ELEC_VDW_W4W4_F,
+
+    eNR_NBKERNEL_NR,  /* Total number of interaction-specific kernel entries */
+
+    eNR_NBKERNEL_GENERIC = eNR_NBKERNEL_NR, /* Reuse number; KERNEL_NR is not an entry itself */
+    eNR_NBKERNEL_FREE_ENERGY,               /* Add other generic kernels _before_ the free energy one */
+
      eNR_NBKERNEL_ALLVSALL,
      eNR_NBKERNEL_ALLVSALLGB,
-    eNR_NBKERNEL_OUTER,
+
      eNR_NBNXN_DIST2,
      eNR_NBNXN_LJ_RF,  eNR_NBNXN_LJ_RF_E,
      eNR_NBNXN_LJ_TAB, eNR_NBNXN_LJ_TAB_E,
@@ -122,14 +111,16 @@ enum
      eNR_VSITE2,               eNR_VSITE3,               eNR_VSITE3FD,
      eNR_VSITE3FAD,            eNR_VSITE3OUT,            eNR_VSITE4FD,
      eNR_VSITE4FDN,            eNR_VSITEN,               eNR_GB,
-       eNR_CMAP,
+    eNR_CMAP,
      eNRNB
  };
  
  
-typedef struct {
-  double n[eNRNB];
-} t_nrnb;
+typedef struct
+{
+    double n[eNRNB];
+}
+t_nrnb;
  
  
  typedef struct gmx_wallcycle *gmx_wallcycle_t;
diff --git a/include/types/simple.h b/include/types/simple.h

index c9be7a06c1e46018cfad1208551de544a4e0ab33..72868210f9b5be2f5376edbe7a02cb9081faaad8 100644 (file)
--- a/include/types/simple.h
+++ b/include/types/simple.h
@@ -217,6 +217,12 @@ typedef int gmx_large_int_t;
  #endif
      
      
+#ifndef gmx_inline
+/* config.h tests for inline definitions and should work on a much wider range
+ * of compilers, but does not work with installed headers. These compiler checks
+ * still enable a real inline keyword for the most common compilers.
+ */
+
  /* Try to define suitable inline keyword for gmx_inline.
   * Set it to empty if we cannot find one (and dont complain to the user)
   */
@@ -245,11 +251,18 @@ typedef int gmx_large_int_t;
  #endif
  
  #else
+/* C++ */
  #  define gmx_inline inline
  #endif
  
+#endif /* ifndef gmx_inline */
+
+
+/* Restrict keywords. Note that this has to be done for C++ too, unless
+ * it was set from the more general checks if we had config.h (gmx internal)
+ */
+#ifndef gmx_restrict
  
-/* Restrict keywords. Note that this has to be done for C++ too. */
  #ifdef __GNUC__
  /* GCC */
  #  define gmx_restrict   __restrict__
@@ -272,6 +285,7 @@ typedef int gmx_large_int_t;
  #  define gmx_restrict
  #endif
  
+#endif
  
  /* Standard sizes for char* string buffers */
  #define STRLEN 4096
diff --git a/include/vec.h b/include/vec.h

index cbf570674450b739b05ab614f31ff0365c6306f6..fc74939b727d912ad2153f2db1cdf15085089e43 100644 (file)
--- a/include/vec.h
+++ b/include/vec.h
@@ -210,15 +210,27 @@ static real gmx_invsqrt(real x)
  #define INVSQRT_DONE
  #endif /* powerpc_invsqrt */
  
-
  #ifndef INVSQRT_DONE
-#define gmx_invsqrt(x) (1.0f/sqrt(x))
+#    ifdef GMX_DOUBLE
+#        ifdef HAVE_RSQRT
+#            define gmx_invsqrt(x)     rsqrt(x)
+#        else
+#            define gmx_invsqrt(x)     (1.0/sqrt(x))
+#        endif
+#    else /* single */
+#        ifdef HAVE_RSQRTF
+#            define gmx_invsqrt(x)     rsqrtf(x)
+#        elif defined HAVE_RSQRT
+#            define gmx_invsqrt(x)     rsqrt(x)
+#        elif defined HAVE_SQRTF
+#            define gmx_invsqrt(x)     (1.0/sqrtf(x))
+#        else
+#            define gmx_invsqrt(x)     (1.0/sqrt(x))
+#        endif
+#    endif
  #endif
  
  
-
-
-
  static real sqr(real x)
  {
    return (x*x);
@@ -522,8 +534,10 @@ static gmx_inline real norm(const rvec a)
     * float/double case here instead to avoid gmx_sqrt() being accidentally used. */
  #ifdef GMX_DOUBLE
    return dnorm(a);
-#else
+#elif defined HAVE_SQRTF
    return sqrtf(iprod(a, a));
+#else
+  return sqrt(iprod(a, a));
  #endif
  }
  
diff --git a/share/html/online/mdp_opt.html b/share/html/online/mdp_opt.html

index c51626034ef3ceeef70b547f8d7090dec6c4711d..637cd9561dcb30687f9c5d3b458f38ba818866df 100644 (file)
--- a/share/html/online/mdp_opt.html
+++ b/share/html/online/mdp_opt.html
@@ -35,7 +35,7 @@ IF YOU'RE NOT SURE ABOUT WHAT YOU'RE DOING, DON'T DO IT!
  <li><a HREF="#xmdrun"><b>shell molecular dynamics</b></a>(emtol,niter,fcstep)
  <li><a HREF="#tpi"><b>test particle insertion</b></a>(rtpi)
  <li><A HREF="#out"><b>output control</b></A> (nstxout, nstvout, nstfout, nstlog, nstcalcenergy, nstenergy, nstxtcout, xtc-precision, xtc-grps, energygrps)
-<li><A HREF="#nl"><b>neighbor searching</b></A> (cutoff-scheme, nstlist, ns-type, pbc, periodic-molecules, verlet-buffer-drift, rlist, rlistlong)
+<li><A HREF="#nl"><b>neighbor searching</b></A> (cutoff-scheme, nstlist, nstcalclr, ns-type, pbc, periodic-molecules, verlet-buffer-drift, rlist, rlistlong)
  <li><A HREF="#el"><b>electrostatics</b></A> (coulombtype, coulomb-modifier, rcoulomb-switch, rcoulomb, epsilon-r, epsilon-rf)
  <li><A HREF="#vdw"><b>VdW</b></A> (vdwtype, vdw-modifier, rvdw-switch, rvdw, DispCorr)
  <li><A HREF="#table"><b>tables</b></A> (table-extension, energygrp-table)
@@ -442,6 +442,42 @@ while 99.99% of the particles are fine.
  </dd>
  </dl></dd>
  
+<dt><b>nstcalclr: (-1) [steps]</b></dt>
+<dd><dl compact>
+Controls the period between calculations of long-range forces. Useful
+only with <b>cutoff-scheme</b>=<b>group</b>.
+<dt><b>1</b></dt>
+<dd>Calculate the long-range forces every single step. This is useful
+to have separate neighbor lists with buffers for electrostatics and Van
+der Waals interactions, and in particular it makes it possible to have
+the Van der Waals cutoff longer than electrostatics (useful e.g. with
+PME). However, there is no point in having identical long-range
+cutoffs for both interaction forms and update them every step - then
+it will be slightly faster to put everything in the short-range
+list.</dd>
+<dt><b>&gt;1</b></dt>
+<dd>Calculate the long-range forces every <b>nstcalclr</b> steps and
+use a multiple-time-step integrator to combine forces. This can now be
+done more frequently than <b>nstlist</b> since the lists are stored,
+and it might be a good idea e.g. for Van der Waals interactions that
+vary slower than electrostatics.</dd>
+<dt><b>-1</b></dt>
+<dd>Calculate long-range forces on steps where neighbor searching is
+performed. While this is the default value, you might want to consider
+updating the long-range forces more frequently.</dd>
+Note that twin-range force evaluation might be enabled automatically
+by PP-PME load balancing. This is done in order to maintain the chosen
+Van der Waals interaction radius even if the load balancing is
+changing the electrostatics cutoff. If the <tt>.mdp</tt> file already
+specifies twin-range interactions (e.g. to evaluate Lennard-Jones
+interactions with a longer cutoff than the PME electrostatics every
+2-3 steps), the load balancing will have also a small effect on
+Lennard-Jones, since the short-range cutoff (inside which forces are
+evaluated every step) is changed.</dl>
+</dd>
+
+
+
  <dt><b>ns-type:</b></dt>
  <dd><dl compact>
  <dt><b>grid</b></dt>
diff --git a/src/config.h.cmakein b/src/config.h.cmakein

index ea0d65d47fc75623dafc2a0021d5ea397b61e95d..bd115207deeb5140a3c149874d9afc5da8f2c579 100644 (file)
--- a/src/config.h.cmakein
+++ b/src/config.h.cmakein
@@ -293,6 +293,15 @@
  /* Define to 1 if you have the sigaction() function. */
  #cmakedefine HAVE_SIGACTION
  
+/* Define to 1 if you have the rsqrt() function. */
+#cmakedefine HAVE_RSQRT
+
+/* Define to 1 if you have the rsqrtf() function. */
+#cmakedefine HAVE_RSQRTF
+
+/* Define to 1 if you have the sqrtf() function. */
+#cmakedefine HAVE_SQRTF
+
  /* Define to 1 if you have the <string.h> header file. */
  #cmakedefine HAVE_STRING_H
  
@@ -408,14 +417,23 @@
  #cmakedefine gid_t int
  
  /* Define to __inline__ or __inline if that is what the C compiler
-   calls it, or to nothing if inline is not supported under any name.  */
+   calls it, or to nothing if inline is not supported under any name.
+   Please do NOT remove the gmx_inline keyword from here. The classical
+   C++ inline keyword is merely a recommendation to the compiler, and
+   many compilers support stronger alternatives (e.g. __forceinline)
+   that we might want to use. */
+#define gmx_inline ${INLINE_KEYWORD}
  #ifndef __cplusplus
  #define inline ${INLINE_KEYWORD}
  #endif
  
  /* Define to __restrict__ or __restrict if that is what the C compiler
-   calls it, or to nothing if restrict is not supported under any name.  */
-#define restrict ${RESTRICT_KEYWORD}
+   calls it, unless we are on C99 when it is simply called restrict.
+   Since restrict is a reserved key word in C99 we are not allowed to
+   redefine the word itself, so call this gmx_restrict to avoid having
+   to identify the language standard level. If it is not supported, it
+   is still defined to an empty string here. */
+#define gmx_restrict ${RESTRICT_KEYWORD}
  
  #ifndef CPLUSPLUS
  #ifdef __cplusplus
diff --git a/src/gmxlib/bondfree.c b/src/gmxlib/bondfree.c

index 2d49c9738c3d56eb371bb66a343959ae40761c6c..5058b899c42d1f5b273d7c9e81db5790eff7fbcb 100644 (file)
--- a/src/gmxlib/bondfree.c
+++ b/src/gmxlib/bondfree.c
@@ -3023,7 +3023,7 @@ static real bonded_tab(const char *type,int table_nr,
    k = (1.0 - lambda)*kA + lambda*kB;
  
    tabscale = table->scale;
-  VFtab    = table->tab;
+  VFtab    = table->data;
    
    rt    = r*tabscale;
    n0    = rt;
@@ -3579,11 +3579,11 @@ static real calc_one_bond(FILE *fplog,int thread,
          }
          else
          {
-            v = do_listed_vdw_q(ftype,nbn,iatoms+nb0,
-                                idef->iparams,
-                                (const rvec*)x,f,fshift,
-                                pbc,g,lambda,dvdl,
-                                md,fr,grpp,global_atom_index);
+            v = do_nonbonded_listed(ftype,nbn,iatoms+nb0,idef->iparams,(const rvec*)x,f,fshift,
+                                    pbc,g,lambda,dvdl,md,fr,grpp,global_atom_index);
+
+            enerd->dvdl_nonlin[efptCOUL] += dvdl[efptCOUL];
+            enerd->dvdl_nonlin[efptVDW] += dvdl[efptVDW];
              
              if (bPrintSepPot)
              {
@@ -3666,11 +3666,11 @@ static real calc_one_bond_foreign(FILE *fplog,int ftype, const t_idef *idef,
                  }
                  else
                  {
-                    v = do_listed_vdw_q(ftype,nbonds,iatoms,
-                                        idef->iparams,
-                                        (const rvec*)x,f,fr->fshift,
-                                        pbc,g,lambda,dvdl,
-                                        md,fr,&enerd->grpp,global_atom_index);
+                    v = do_nonbonded_listed(ftype,nbonds,iatoms,
+                                            idef->iparams,
+                                            (const rvec*)x,f,fr->fshift,
+                                            pbc,g,lambda,dvdl,
+                                            md,fr,&enerd->grpp,global_atom_index);
                  }
                  if (ind != -1)
                  {
diff --git a/src/gmxlib/names.c b/src/gmxlib/names.c

index 0d428e8f470564c1986806e720f83c76db0e15d9..356785630b2a155d3b60fdceaeebe5911d12acef 100644 (file)
--- a/src/gmxlib/names.c
+++ b/src/gmxlib/names.c
@@ -95,7 +95,7 @@ const char *econstr_names[econtNR+1] = {
  };
  
  const char *eintmod_names[eintmodNR+1] = { 
-  "Potential-shift-Verlet","Potential-shift","None", NULL
+  "Potential-shift-Verlet","Potential-shift","None","Potential-switch","Exact-cutoff", NULL
  };
  
  const char *egrp_nm[egNR+1] = { 
@@ -176,10 +176,6 @@ const char *esol_names[esolNR+1] = {
    "No", "SPC", "TIP4p", NULL
  };
  
-const char *enlist_names[enlistNR+1] = {
-  "Atom-Atom", "SPC-Atom", "SPC-SPC", "TIP4p-Atom", "TIP4p-TIP4p", "CG-CG", NULL
-};
-
  const char *edispc_names[edispcNR+1] = {
    "No", "EnerPres", "Ener", "AllEnerPres", "AllEner", NULL
  };
@@ -257,3 +253,19 @@ const char *eAdressSITEtype_names[eAdressSITENR+1] = {
    "com","cog", "atom", "atomperatom", NULL
  };
  
+const char *gmx_nblist_geometry_names[GMX_NBLIST_GEOMETRY_NR+1] = {
+    "Particle-Particle", "Water3-Particle", "Water3-Water3", "Water4-Particle", "Water4-Water4", "CG-CG", NULL
+};
+
+const char *gmx_nbkernel_elec_names[GMX_NBKERNEL_ELEC_NR+1] =
+{
+    "None", "Coulomb", "Reaction-Field", "Cubic-Spline-Table", "Generalized-Born", "Ewald", NULL
+};
+
+const char *gmx_nbkernel_vdw_names[GMX_NBKERNEL_VDW_NR+1] =
+{
+    "None", "Lennard-Jones", "Buckingham", "Cubic-Spline-Table", NULL
+};
+
+
+
diff --git a/src/gmxlib/nonbonded/CMakeLists.txt b/src/gmxlib/nonbonded/CMakeLists.txt

index 2c3a658eb39e12851d1fcaa20f3861293327049c..74bb4f80e36f22d4cbb315c359a824569bafb2ed 100644 (file)
--- a/src/gmxlib/nonbonded/CMakeLists.txt
+++ b/src/gmxlib/nonbonded/CMakeLists.txt
@@ -1,24 +1,5 @@
  # Sources that should always be built
-file(GLOB NONBONDED_SOURCES *.c nb_kernel_c/*.c nb_kernel_adress_c/*.c)
-
-# Conditionally included ones
-if(GMX_FORTRAN)
-  if (GMX_DOUBLE)
-    file(GLOB NB_KERNEL_FORTRAN_SOURCES nb_kernel_f77_double/*.[cf])
-  else(GMX_DOUBLE)
-    file(GLOB NB_KERNEL_FORTRAN_SOURCES nb_kernel_f77_single/*.[cf])
-  endif(GMX_DOUBLE)
-endif(GMX_FORTRAN)
-
-if(GMX_POWER6)
-  file(GLOB NB_KERNEL_FORTRAN_SOURCES nb_kernel_power6/*.[cF])
-endif(GMX_POWER6)
-
-if(GMX_BLUEGENE)
-  file(GLOB NB_KERNEL_BLUEGENE_SOURCES nb_kernel_bluegene/*.c)
-endif(GMX_BLUEGENE)
-
-list(APPEND NONBONDED_SOURCES ${NB_KERNEL_FORTRAN_SOURCES} ${NB_KERNEL_BLUEGENE_SOURCES})
+file(GLOB NONBONDED_SOURCES *.c nb_kernel_c/*.c)
  
  # These sources will be used in the parent directory's CMakeLists.txt
  set(NONBONDED_SOURCES ${NONBONDED_SOURCES} PARENT_SCOPE)
diff --git a/src/gmxlib/nonbonded/nb_free_energy.c b/src/gmxlib/nonbonded/nb_free_energy.c

index 941be19a201e653fb345fc8d63a4b1a1e647287e..9400c8a89927666e7d85b16cfa16d49adc2d4554 100644 (file)
--- a/src/gmxlib/nonbonded/nb_free_energy.c
+++ b/src/gmxlib/nonbonded/nb_free_energy.c
@@ -41,46 +41,18 @@
  
  #include "vec.h"
  #include "typedefs.h"
+#include "nonbonded.h"
+#include "nb_kernel.h"
+#include "nrnb.h"
  
  void
-gmx_nb_free_energy_kernel(int                  icoul,
-                          int                  ivdw,
-                          int                  nri,
-                          int *                iinr,
-                          int *                jindex,
-                          int *                jjnr,
-                          int *                shift,
-                          real *               shiftvec,
-                          real *               fshift,
-                          int *                gid,
-                          real *               x,
-                          real *               f,
-                          real *               chargeA,
-                          real *               chargeB,
-                          real                 facel,
-                          real                 krf,
-                          real                 crf,
-                          real                 ewc,
-                          real *               Vc,
-                          int *                typeA,
-                          int *                typeB,
-                          int                  ntype,
-                          real *               nbfp,
-                          real *               Vv,
-                          real                 tabscale,
-                          real *               VFtab,
-                          real                 lambda_coul,
-                          real                 lambda_vdw,
-                          real *               dvdl,
-                          real                 alpha_coul,
-                          real                 alpha_vdw,
-                          int                  lam_power,
-                          real                 sc_r_power,
-                          real                 sigma6_def,
-                          real                 sigma6_min,
-                          gmx_bool             bDoForces,
-                          int *                outeriter,
-                          int *                inneriter)
+gmx_nb_free_energy_kernel(t_nblist *                nlist,
+                          rvec *                    xx,
+                          rvec *                    ff,
+                          t_forcerec *              fr,
+                          t_mdatoms *               mdatoms,
+                          nb_kernel_data_t *        kernel_data,
+                          t_nrnb *                  nrnb)
  {
  
  #define  STATE_A  0
@@ -108,7 +80,112 @@ gmx_nb_free_energy_kernel(int                  icoul,
      int           n0,n1C,n1V,nnn;
      real          Y,F,G,H,Fp,Geps,Heps2,epsC,eps2C,epsV,eps2V,VV,FF;
      double        isp=0.564189583547756;
+    int           icoul,ivdw;
+    int           nri;
+    int *         iinr;
+    int *         jindex;
+    int *         jjnr;
+    int *         shift;
+    int *         gid;
+    int *         typeA;
+    int *         typeB;
+    int           ntype;
+    real *        shiftvec;
      real          dvdl_part;
+    real *        fshift;
+    real          tabscale;
+    real *        VFtab;
+    real *        x;
+    real *        f;
+    real          facel,krf,crf;
+    real *        chargeA;
+    real *        chargeB;
+    real          sigma6_min,sigma6_def,lam_power,sc_power,sc_r_power;
+    real          alpha_coul,alpha_vdw,lambda_coul,lambda_vdw,ewc;
+    real *        nbfp;
+    real *        dvdl;
+    real *        Vv;
+    real *        Vc;
+    gmx_bool      bDoForces;
+    real          rcoulomb,rvdw,factor_coul,factor_vdw,sh_invrc6;
+    gmx_bool      bExactElecCutoff,bExactVdwCutoff;
+    real          rcutoff,rcutoff2,rswitch,d,d2,swV3,swV4,swV5,swF2,swF3,swF4,sw,dsw,rinvcorr;
+
+    x                   = xx[0];
+    f                   = ff[0];
+
+    fshift              = fr->fshift[0];
+    Vc                  = kernel_data->energygrp_elec;
+    Vv                  = kernel_data->energygrp_vdw;
+    tabscale            = kernel_data->table_elec_vdw->scale;
+    VFtab               = kernel_data->table_elec_vdw->data;
+
+    nri                 = nlist->nri;
+    iinr                = nlist->iinr;
+    jindex              = nlist->jindex;
+    jjnr                = nlist->jjnr;
+    icoul               = nlist->ielec;
+    ivdw                = nlist->ivdw;
+    shift               = nlist->shift;
+    gid                 = nlist->gid;
+
+    shiftvec            = fr->shift_vec[0];
+    chargeA             = mdatoms->chargeA;
+    chargeB             = mdatoms->chargeB;
+    facel               = fr->epsfac;
+    krf                 = fr->k_rf;
+    crf                 = fr->c_rf;
+    ewc                 = fr->ewaldcoeff;
+    Vc                  = kernel_data->energygrp_elec;
+    typeA               = mdatoms->typeA;
+    typeB               = mdatoms->typeB;
+    ntype               = fr->ntype;
+    nbfp                = fr->nbfp;
+    Vv                  = kernel_data->energygrp_vdw;
+    tabscale            = kernel_data->table_elec_vdw->scale;
+    VFtab               = kernel_data->table_elec_vdw->data;
+    lambda_coul         = kernel_data->lambda[efptCOUL];
+    lambda_vdw          = kernel_data->lambda[efptVDW];
+    dvdl                = kernel_data->dvdl;
+    alpha_coul          = fr->sc_alphacoul;
+    alpha_vdw           = fr->sc_alphavdw;
+    lam_power           = fr->sc_power;
+    sc_r_power          = fr->sc_r_power;
+    sigma6_def          = fr->sc_sigma6_def;
+    sigma6_min          = fr->sc_sigma6_min;
+    bDoForces           = kernel_data->flags & GMX_NONBONDED_DO_FORCE;
+
+    rcoulomb            = fr->rcoulomb;
+    rvdw                = fr->rvdw;
+    sh_invrc6           = fr->ic->sh_invrc6;
+
+    if(fr->coulomb_modifier==eintmodPOTSWITCH || fr->vdw_modifier==eintmodPOTSWITCH)
+    {
+        rcutoff         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb : fr->rvdw;
+        rcutoff2        = rcutoff*rcutoff;
+        rswitch         = (fr->coulomb_modifier==eintmodPOTSWITCH) ? fr->rcoulomb_switch : fr->rvdw_switch;
+        d               = rcutoff-rswitch;
+        swV3            = -10.0/(d*d*d);
+        swV4            =  15.0/(d*d*d*d);
+        swV5            =  -6.0/(d*d*d*d*d);
+        swF2            = -30.0/(d*d*d);
+        swF3            =  60.0/(d*d*d*d);
+        swF4            = -30.0/(d*d*d*d*d);
+    }
+    else
+    {
+        /* Stupid compilers dont realize these variables will not be used */
+        rswitch         = 0.0;
+        swV3            = 0.0;
+        swV4            = 0.0;
+        swV5            = 0.0;
+        swF2            = 0.0;
+        swF3            = 0.0;
+        swF4            = 0.0;
+    }
+
+    bExactElecCutoff    = (fr->coulomb_modifier!=eintmodNONE) || fr->eeltype == eelRF_ZERO;
+    bExactVdwCutoff     = (fr->vdw_modifier!=eintmodNONE);
  
      /* fix compiler warnings */
      nj1   = 0;
@@ -144,8 +221,8 @@ gmx_nb_free_energy_kernel(int                  icoul,
  
      /* Ewald (not PME) table is special (icoul==enbcoulFEWALD) */
  
-    do_coultab = (icoul==enbcoulTAB);
-    do_vdwtab  = (ivdw==enbcoulTAB);
+    do_coultab = (icoul==GMX_NBKERNEL_ELEC_CUBICSPLINETABLE);
+    do_vdwtab  = (ivdw==GMX_NBKERNEL_VDW_CUBICSPLINETABLE);
      
      do_tab = do_coultab || do_vdwtab;
      
@@ -154,11 +231,11 @@ gmx_nb_free_energy_kernel(int                  icoul,
      
      for(n=0; (n<nri); n++)
      {
-        is3              = 3*shift[n];     
+        is3              = 3*shift[n];
          shX              = shiftvec[is3];  
          shY              = shiftvec[is3+1];
          shZ              = shiftvec[is3+2];
-        nj0              = jindex[n];      
+        nj0              = jindex[n];
          nj1              = jindex[n+1];    
          ii               = iinr[n];        
          ii3              = 3*ii;           
@@ -209,15 +286,16 @@ gmx_nb_free_energy_kernel(int                  icoul,
              qq[STATE_A]      = iqA*chargeA[jnr];
              qq[STATE_B]      = iqB*chargeB[jnr];
  
-            for (i=0;i<NSTATES;i++) 
+            for (i=0;i<NSTATES;i++)
              {
  
                  c6[i]              = nbfp[tj[i]];
                  c12[i]             = nbfp[tj[i]+1];
                  if((c6[i] > 0) && (c12[i] > 0))
                  {
-                    sigma6[i]       = c12[i]/c6[i];
-                    sigma2[i]       = pow(c12[i]/c6[i],1.0/3.0);
+                    /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
+                    sigma6[i]       = 0.5*c12[i]/c6[i];
+                    sigma2[i]       = pow(sigma6[i],1.0/3.0);
                      /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
                         what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
                      if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
@@ -278,6 +356,9 @@ gmx_nb_free_energy_kernel(int                  icoul,
                      rinvV          = pow(rpinvV,1.0/sc_r_power);
                      rV             = 1.0/rinvV;
  
+                    factor_coul    = (rC<=rcoulomb) ? 1.0 : 0.0;
+                    factor_vdw     = (rV<=rvdw)     ? 1.0 : 0.0;
+
                      if (do_tab)
                      {
                          rtC        = rC*tabscale;
@@ -293,94 +374,158 @@ gmx_nb_free_energy_kernel(int                  icoul,
                          n1V        = tab_elemsize*n0;
                      }
  
-                    if(icoul==enbcoulOOR || icoul==enbcoulFEWALD)
+                    if(qq[i] != 0)
                      {
-                        /* simple cutoff */
-                        Vcoul[i]   = qq[i]*rinvC;
-                        FscalC[i]  = Vcoul[i]*rpinvC;
-                    }
-                    else if(icoul==enbcoulRF)
-                    {
-                        /* reaction-field */
-                        krsq       = krf*rC*rC;
-                        Vcoul[i]   = qq[i]*(rinvC+krsq-crf);
-                        FscalC[i]  = qq[i]*(rinvC-2.0*krsq)*rpinvC;
-                    }
-                    else if (icoul==enbcoulTAB)
-                    {
-                        /* non-Ewald tabulated coulomb */
-                        nnn        = n1C;
-                        Y          = VFtab[nnn];
-                        F          = VFtab[nnn+1];
-                        Geps       = epsC*VFtab[nnn+2];
-                        Heps2      = eps2C*VFtab[nnn+3];
-                        Fp         = F+Geps+Heps2;
-                        VV         = Y+epsC*Fp;
-                        FF         = Fp+Geps+2.0*Heps2;
-                        Vcoul[i]   = qq[i]*VV;
-                        FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
+                        switch(icoul)
+                        {
+                            case GMX_NBKERNEL_ELEC_COULOMB:
+                            case GMX_NBKERNEL_ELEC_EWALD:
+                                /* simple cutoff (yes, ewald is done all on direct space for free energy) */
+                                Vcoul[i]   = qq[i]*rinvC;
+                                FscalC[i]  = Vcoul[i]*rpinvC;
+                                break;
+                                
+                            case GMX_NBKERNEL_ELEC_REACTIONFIELD:
+                                /* reaction-field */
+                                Vcoul[i]   = qq[i]*(rinvC+krf*rC*rC-crf);
+                                FscalC[i]  = qq[i]*(rinvC*rpinvC-2.0*krf);
+                                break;
+
+                            case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
+                                /* non-Ewald tabulated coulomb */
+                                nnn        = n1C;
+                                Y          = VFtab[nnn];
+                                F          = VFtab[nnn+1];
+                                Geps       = epsC*VFtab[nnn+2];
+                                Heps2      = eps2C*VFtab[nnn+3];
+                                Fp         = F+Geps+Heps2;
+                                VV         = Y+epsC*Fp;
+                                FF         = Fp+Geps+2.0*Heps2;
+                                Vcoul[i]   = qq[i]*VV;
+                                FscalC[i]  = -qq[i]*tabscale*FF*rC*rpinvC;
+                                break;
+
+                            default:
+                                FscalC[i]  = 0.0;
+                                Vcoul[i]   = 0.0;
+                                break;
+                        }
+
+                        if(fr->coulomb_modifier==eintmodPOTSWITCH)
+                        {
+                            d                = rC-rswitch;
+                            d                = (d>0.0) ? d : 0.0;
+                            d2               = d*d;
+                            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+                            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+                            Vcoul[i]        *= sw;
+                            FscalC[i]        = FscalC[i]*sw + Vcoul[i]*dsw;
+                        }
+
+                        if(bExactElecCutoff)
+                        {
+                            FscalC[i]        = (rC<rcoulomb) ? FscalC[i] : 0.0;
+                            Vcoul[i]         = (rC<rcoulomb) ? Vcoul[i] : 0.0;
+                        }
                      }
  
-                    if(ivdw==enbvdwLJ)
+                    if((c6[i] != 0) || (c12[i] != 0))
                      {
-                        /* cutoff LJ */
-                        if (sc_r_power == 6.0)
+                        switch(ivdw)
                          {
-                            rinv6            = rpinvV;
+                            case GMX_NBKERNEL_VDW_LENNARDJONES:
+                                /* cutoff LJ */
+                                if (sc_r_power == 6.0)
+                                {
+                                    rinv6            = rpinvV;
+                                }
+                                else
+                                {
+                                    rinv6            = pow(rinvV,6.0);
+                                }
+                                Vvdw6            = c6[i]*rinv6;
+                                Vvdw12           = c12[i]*rinv6*rinv6;
+                                if(fr->vdw_modifier==eintmodPOTSHIFT)
+                                {
+                                    Vvdw[i]          = ( (Vvdw12-c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
+                                                        -(Vvdw6-c6[i]*sh_invrc6)*(1.0/6.0));
+                                }
+                                else
+                                {
+                                    Vvdw[i]          = Vvdw12*(1.0/12.0)-Vvdw6*(1.0/6.0);
+                                }
+                                FscalV[i]        = (Vvdw12-Vvdw6)*rpinvV;
+                                break;
+
+                            case GMX_NBKERNEL_VDW_BUCKINGHAM:
+                                gmx_fatal(FARGS,"Buckingham free energy not supported.");
+                                break;
+
+                            case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
+                                /* Table LJ */
+                                nnn = n1V+4;
+                                /* dispersion */
+                                Y          = VFtab[nnn];
+                                F          = VFtab[nnn+1];
+                                Geps       = epsV*VFtab[nnn+2];
+                                Heps2      = eps2V*VFtab[nnn+3];
+                                Fp         = F+Geps+Heps2;
+                                VV         = Y+epsV*Fp;
+                                FF         = Fp+Geps+2.0*Heps2;
+                                Vvdw[i]   += c6[i]*VV;
+                                FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
+
+                                /* repulsion */
+                                Y          = VFtab[nnn+4];
+                                F          = VFtab[nnn+5];
+                                Geps       = epsV*VFtab[nnn+6];
+                                Heps2      = eps2V*VFtab[nnn+7];
+                                Fp         = F+Geps+Heps2;
+                                VV         = Y+epsV*Fp;
+                                FF         = Fp+Geps+2.0*Heps2;
+                                Vvdw[i]   += c12[i]*VV;
+                                FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
+                                break;
+
+                            default:
+                                Vvdw[i]    = 0.0;
+                                FscalV[i]  = 0.0;
+                                break;
                          }
-                        else
+
+                        if(fr->vdw_modifier==eintmodPOTSWITCH)
                          {
-                            rinv6            = pow(rinvV,6.0);
+                            d                = rV-rswitch;
+                            d                = (d>0.0) ? d : 0.0;
+                            d2               = d*d;
+                            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+                            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+                            Vvdw[i]         *= sw;
+                            FscalV[i]        = FscalV[i]*sw + Vvdw[i]*dsw;
+
+                            FscalV[i]        = (rV<rvdw) ? FscalV[i] : 0.0;
+                            Vvdw[i]          = (rV<rvdw) ? Vvdw[i] : 0.0;
                          }
-                        Vvdw6            = c6[i]*rinv6;
-                        Vvdw12           = c12[i]*rinv6*rinv6;
-                        Vvdw[i]          = Vvdw12-Vvdw6;
-                        FscalV[i]        = (12.0*Vvdw12-6.0*Vvdw6)*rpinvV;
                      }
-                    else if(ivdw==enbvdwTAB)
-                    {
-                        /* Table LJ */
-                        nnn = n1V+4;
-
-                        /* dispersion */
-                        Y          = VFtab[nnn];
-                        F          = VFtab[nnn+1];
-                        Geps       = epsV*VFtab[nnn+2];
-                        Heps2      = eps2V*VFtab[nnn+3];
-                        Fp         = F+Geps+Heps2;
-                        VV         = Y+epsV*Fp;
-                        FF         = Fp+Geps+2.0*Heps2;
-                        Vvdw[i]   += c6[i]*VV;
-                        FscalV[i] -= c6[i]*tabscale*FF*rV*rpinvV;
-
-                        /* repulsion */
-                        Y          = VFtab[nnn+4];
-                        F          = VFtab[nnn+5];
-                        Geps       = epsV*VFtab[nnn+6];
-                        Heps2      = eps2V*VFtab[nnn+7];
-                        Fp         = F+Geps+Heps2;
-                        VV         = Y+epsV*Fp;
-                        FF         = Fp+Geps+2.0*Heps2;
-                        Vvdw[i]   += c12[i]*VV;
-                        FscalV[i] -= c12[i]*tabscale*FF*rV*rpinvV;
-                    }           
-                    /* Buckingham vdw free energy not supported for now */
                  }
              }
  
              Fscal = 0;
  
-            if (icoul==enbcoulFEWALD) {
+            if (icoul==GMX_NBKERNEL_ELEC_EWALD)
+            {
                  /* because we compute the softcore normally,
-                   we have to remove the ewald short range portion. Done outside of
-                   the states loop because this part doesn't depend on the scaled R */
+                 we have to remove the ewald short range portion. Done outside of
+                 the states loop because this part doesn't depend on the scaled R */
  
-                if (r != 0) 
+                if (r != 0)
                  {
                      VV    = gmx_erf(ewc*r)*rinv;
                      FF    = rinv*rinv*(VV - 2.0*ewc*isp*exp(-ewc*ewc*rsq));
                  }
-                else 
+                else
                  {
                      VV    = ewc*2.0/sqrt(M_PI);
                      FF    = 0;
@@ -437,6 +582,193 @@ gmx_nb_free_energy_kernel(int                  icoul,
  
      dvdl[efptCOUL]     += dvdl_coul;
      dvdl[efptVDW]      += dvdl_vdw;
-    *outeriter       = nri;            
-    *inneriter       = nj1;            
+
+    /* Estimate flops, average for free energy stuff:
+     * 12  flops per outer iteration
+     * 150 flops per inner iteration
+     */
+    inc_nrnb(nrnb,eNR_NBKERNEL_FREE_ENERGY,nlist->nri*12 + nlist->jindex[n]*150);
+}
+
+real
+nb_free_energy_evaluate_single(real r2,real sc_r_power,real alpha_coul,real alpha_vdw,
+                               real tabscale,real *vftab,
+                               real qqA, real c6A, real c12A, real qqB, real c6B, real c12B,
+                               real LFC[2], real LFV[2],real DLF[2],
+                               real lfac_coul[2], real lfac_vdw[2],real dlfac_coul[2], real dlfac_vdw[2],
+                               real sigma6_def, real sigma6_min,real sigma2_def, real sigma2_min,
+                               real *velectot, real *vvdwtot, real *dvdl)
+{
+    real       r,rp,rpm2,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VV,FF,fscal;
+    real       qq[2],c6[2],c12[2],sigma6[2],sigma2[2],sigma_pow[2],sigma_powm2[2];
+    real       alpha_coul_eff,alpha_vdw_eff,dvdl_coul,dvdl_vdw;
+    real       rpinv,r_coul,r_vdw,velecsum,vvdwsum;
+    real       fscal_vdw[2],fscal_elec[2];
+    real       velec[2],vvdw[2];
+    int        i,ntab;
+
+    qq[0]    = qqA;
+    qq[1]    = qqB;
+    c6[0]    = c6A;
+    c6[1]    = c6B;
+    c12[0]   = c12A;
+    c12[1]   = c12B;
+
+    if (sc_r_power == 6.0)
+    {
+        rpm2             = r2*r2; /* r4 */
+        rp               = rpm2*r2; /* r6 */
+    }
+    else if (sc_r_power == 48.0)
+    {
+        rp               = r2*r2*r2;  /* r6 */
+        rp               = rp*rp; /* r12 */
+        rp               = rp*rp; /* r24 */
+        rp               = rp*rp; /* r48 */
+        rpm2             = rp/r2; /* r46 */
+    }
+    else
+    {
+        rp             = pow(r2,0.5*sc_r_power);  /* not currently supported as input, but can handle it */
+        rpm2           = rp/r2;
+    }
+
+    /* Loop over state A(0) and B(1) */
+    for (i=0;i<2;i++)
+    {
+        if((c6[i] > 0) && (c12[i] > 0))
+        {
+            sigma6[i]       = c12[i]/c6[i];
+            sigma2[i]       = pow(c12[i]/c6[i],1.0/3.0);
+            /* should be able to get rid of this ^^^ internal pow call eventually.  Will require agreement on
+             what data to store externally.  Can't be fixed without larger scale changes, so not 4.6 */
+            if (sigma6[i] < sigma6_min) { /* for disappearing coul and vdw with soft core at the same time */
+                sigma6[i] = sigma6_min;
+                sigma2[i] = sigma2_min;
+            }
+        }
+        else
+        {
+            sigma6[i]       = sigma6_def;
+            sigma2[i]       = sigma2_def;
+        }
+        if (sc_r_power == 6.0)
+        {
+            sigma_pow[i]    = sigma6[i];
+            sigma_powm2[i]  = sigma6[i]/sigma2[i];
+        }
+        else if (sc_r_power == 48.0)
+        {
+            sigma_pow[i]    = sigma6[i]*sigma6[i];   /* sigma^12 */
+            sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
+            sigma_pow[i]    = sigma_pow[i]*sigma_pow[i]; /* sigma^48 */
+            sigma_powm2[i]  = sigma_pow[i]/sigma2[i];
+        }
+        else
+        {    /* not really supported as input, but in here for testing the general case*/
+            sigma_pow[i]    = pow(sigma2[i],sc_r_power/2);
+            sigma_powm2[i]  = sigma_pow[i]/(sigma2[i]);
+        }
+    }
+
+    /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
+    if ((c12[0] > 0) && (c12[1] > 0)) {
+        alpha_vdw_eff    = 0;
+        alpha_coul_eff   = 0;
+    } else {
+        alpha_vdw_eff    = alpha_vdw;
+        alpha_coul_eff   = alpha_coul;
+    }
+
+    /* Loop over A and B states again */
+    for (i=0;i<2;i++)
+    {
+        fscal_elec[i] = 0;
+        fscal_vdw[i]  = 0;
+        velec[i]      = 0;
+        vvdw[i]       = 0;
+
+        /* Only spend time on A or B state if it is non-zero */
+        if( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
+        {
+            /* Coulomb */
+            rpinv            = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
+            r_coul           = pow(rpinv,-1.0/sc_r_power);
+
+            /* Electrostatics table lookup data */
+            rtab             = r_coul*tabscale;
+            ntab             = rtab;
+            eps              = rtab-ntab;
+            eps2             = eps*eps;
+            ntab             = 12*ntab;
+            /* Electrostatics */
+            Y                = vftab[ntab];
+            F                = vftab[ntab+1];
+            Geps             = eps*vftab[ntab+2];
+            Heps2            = eps2*vftab[ntab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+eps*Fp;
+            FF               = Fp+Geps+2.0*Heps2;
+            velec[i]         = qq[i]*VV;
+            fscal_elec[i]    = -qq[i]*FF*r_coul*rpinv*tabscale;
+
+            /* Vdw */
+            rpinv            = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
+            r_vdw            = pow(rpinv,-1.0/sc_r_power);
+            /* Vdw table lookup data */
+            rtab             = r_vdw*tabscale;
+            ntab             = rtab;
+            eps              = rtab-ntab;
+            eps2             = eps*eps;
+            ntab             = 12*ntab;
+            /* Dispersion */
+            Y                = vftab[ntab+4];
+            F                = vftab[ntab+5];
+            Geps             = eps*vftab[ntab+6];
+            Heps2            = eps2*vftab[ntab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+eps*Fp;
+            FF               = Fp+Geps+2.0*Heps2;
+            vvdw[i]          = c6[i]*VV;
+            fscal_vdw[i]     = -c6[i]*FF;
+
+            /* Repulsion */
+            Y                = vftab[ntab+8];
+            F                = vftab[ntab+9];
+            Geps             = eps*vftab[ntab+10];
+            Heps2            = eps2*vftab[ntab+11];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+eps*Fp;
+            FF               = Fp+Geps+2.0*Heps2;
+            vvdw[i]         += c12[i]*VV;
+            fscal_vdw[i]    -= c12[i]*FF;
+            fscal_vdw[i]    *= r_vdw*rpinv*tabscale;
+        }
+    }
+    /* Now we have velec[i], vvdw[i], and fscal[i] for both states */
+    /* Assemble A and B states */
+    velecsum  = 0;
+    vvdwsum   = 0;
+    dvdl_coul = 0;
+    dvdl_vdw  = 0;
+    fscal     = 0;
+    for (i=0;i<2;i++)
+    {
+        velecsum      += LFC[i]*velec[i];
+        vvdwsum       += LFV[i]*vvdw[i];
+
+        fscal         += (LFC[i]*fscal_elec[i]+LFV[i]*fscal_vdw[i])*rpm2;
+
+        dvdl_coul     += velec[i]*DLF[i] + LFC[i]*alpha_coul_eff*dlfac_coul[i]*fscal_elec[i]*sigma_pow[i];
+        dvdl_vdw      += vvdw[i]*DLF[i] + LFV[i]*alpha_vdw_eff*dlfac_vdw[i]*fscal_vdw[i]*sigma_pow[i];
+    }
+
+    dvdl[efptCOUL]     += dvdl_coul;
+    dvdl[efptVDW]      += dvdl_vdw;
+
+    *velectot           = velecsum;
+    *vvdwtot            = vvdwsum;
+
+    return fscal;
  }
+
diff --git a/src/gmxlib/nonbonded/nb_free_energy.h b/src/gmxlib/nonbonded/nb_free_energy.h

index 53696012324e4c7334e7b3aa2a568d7b44667975..4e7c63805a70f3f3258529c9ac758fef3443ef5b 100644 (file)
--- a/src/gmxlib/nonbonded/nb_free_energy.h
+++ b/src/gmxlib/nonbonded/nb_free_energy.h
@@ -35,47 +35,27 @@
  #ifndef _nb_free_energy_h_
  #define _nb_free_energy_h_
  
+#include "nb_kernel.h"
  #include <typedefs.h>
  
  void
-gmx_nb_free_energy_kernel(int                  icoul,
-                          int                  ivdw,
-                          int                  nri,
-                          int *                iinr,
-                          int *                jindex,
-                          int *                jjnr,
-                          int *                shift,
-                          real *               shiftvec,
-                          real *               fshift,
-                          int *                gid,
-                          real *               x,
-                          real *               f,
-                          real *               chargeA,
-                          real *               chargeB,
-                          real                 facel,
-                          real                 krf,
-                          real                 crf,
-                          real                 ewc,
-                          real *               Vc,
-                          int *                typeA,
-                          int *                typeB,
-                          int                  ntype,
-                          real *               nbfp,
-                          real *               Vvdw,
-                          real                 tabscale,
-                          real *               VFtab,
-                          real                 lambda_coul,
-                          real                 lambda_vdw,
-                          real *               dvdl,
-                          real                 alpha_coul,
-                          real                 alpha_vdw,
-                          int                  lam_power,
-                          real                 sc_r_power,
-                          real                 sigma6_def,
-                          real                 sigma6_min,
-                          gmx_bool             bDoForces,
-                          int *                outeriter,
-                          int *                inneriter);
+gmx_nb_free_energy_kernel(t_nblist *                nlist,
+                          rvec *                    x,
+                          rvec *                    f,
+                          t_forcerec *              fr,
+                          t_mdatoms *               mdatoms,
+                          nb_kernel_data_t *        kernel_data,
+                          t_nrnb *                  nrnb);
+
+real
+nb_free_energy_evaluate_single(real r2,real sc_r_power,real alpha_coul,
+                               real alpha_vdw,real tabscale,real *vftab,
+                               real qqA, real c6A, real c12A, real qqB,
+                               real c6B, real c12B,real LFC[2], real LFV[2],real DLF[2],
+                               real lfac_coul[2], real lfac_vdw[2],real dlfac_coul[2],
+                               real dlfac_vdw[2],real sigma6_def, real sigma6_min,
+                               real sigma2_def, real sigma2_min,
+                               real *velectot, real *vvdwtot, real *dvdl);
  
  #endif
  
diff --git a/src/gmxlib/nonbonded/nb_generic.c b/src/gmxlib/nonbonded/nb_generic.c

index 02a0ee8ab1464e7621cc03a485529ac0b6a65243..b052049de291fdada83b9fe78659bb95e53257ec 100644 (file)
--- a/src/gmxlib/nonbonded/nb_generic.c
+++ b/src/gmxlib/nonbonded/nb_generic.c
@@ -42,69 +42,147 @@
  #include "vec.h"
  #include "typedefs.h"
  #include "nb_generic.h"
+#include "nrnb.h"
+
+#include "nonbonded.h"
+#include "nb_kernel.h"
+
  
  void
-gmx_nb_generic_kernel(t_nblist *           nlist,
-                                         t_forcerec *         fr,
-                                         t_mdatoms *          mdatoms,
-                                         real *               x,
-                                         real *               f,
-                                         real *               fshift,
-                                         real *               Vc,
-                                         real *               Vvdw,
-                                         real                 tabscale,  
-                                         real *               VFtab,
-                                         int *                outeriter,
-                                         int *                inneriter)
+gmx_nb_generic_kernel(t_nblist *                nlist,
+                      rvec *                    xx,
+                      rvec *                    ff,
+                      t_forcerec *              fr,
+                      t_mdatoms *               mdatoms,
+                      nb_kernel_data_t *        kernel_data,
+                      t_nrnb *                  nrnb)
  {
-    int           nri,ntype,table_nelements,icoul,ivdw;
+    int           nri,ntype,table_nelements,ielec,ivdw;
      real          facel,gbtabscale;
      int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid,nnn,n0;
      real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
+    real          fscal,felec,fvdw,velec,vvdw,tx,ty,tz;
      real          rinvsq;
      real          iq;
-    real          qq,vcoul,krsq,vctot;
+    real          qq,vctot;
      int           nti,nvdwparam;
      int           tj;
-       real          rt,r,eps,eps2,Y,F,Geps,Heps2,VV,FF,Fp,fijD,fijR;
+    real          rt,r,eps,eps2,Y,F,Geps,Heps2,VV,FF,Fp,fijD,fijR;
      real          rinvsix;
-    real          Vvdwtot;
-    real          Vvdw_rep,Vvdw_disp;
+    real          vvdwtot;
+    real          vvdw_rep,vvdw_disp;
      real          ix,iy,iz,fix,fiy,fiz;
      real          jx,jy,jz;
      real          dx,dy,dz,rsq,rinv;
      real          c6,c12,cexp1,cexp2,br;
-       real *        charge;
-       real *        shiftvec;
-       real *        vdwparam;
-       int *         shift;
-       int *         type;
-       
-       icoul               = nlist->icoul;
-       ivdw                = nlist->ivdw;
-
-       /* avoid compiler warnings for cases that cannot happen */
-       nnn                 = 0;
-       vcoul               = 0.0;
-       eps                 = 0.0;
-       eps2                = 0.0;
-       
-       /* 3 VdW parameters for buckingham, otherwise 2 */
-       nvdwparam           = (nlist->ivdw==2) ? 3 : 2;
-       table_nelements     = (icoul==3) ? 4 : 0;
-       table_nelements    += (ivdw==3) ? 8 : 0;
-         
+    real *        charge;
+    real *        shiftvec;
+    real *        vdwparam;
+    int *         shift;
+    int *         type;
+    real *        fshift;
+    real *        velecgrp;
+    real *        vvdwgrp;
+    real          tabscale;
+    real *        VFtab;
+    real *        x;
+    real *        f;
+    int           ewitab;
+    real          ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real *        ewtab;
+    real          rcoulomb2,rvdw,rvdw2,sh_invrc6;
+    real          rcutoff,rcutoff2;
+    real          rswitch_elec,rswitch_vdw,d,d2,sw,dsw,rinvcorr;
+    real          elec_swV3,elec_swV4,elec_swV5,elec_swF2,elec_swF3,elec_swF4;
+    real          vdw_swV3,vdw_swV4,vdw_swV5,vdw_swF2,vdw_swF3,vdw_swF4;
+    gmx_bool      bExactElecCutoff,bExactVdwCutoff,bExactCutoff;
+
+    x                   = xx[0];
+    f                   = ff[0];
+    ielec               = nlist->ielec;
+    ivdw                = nlist->ivdw;
+
+    fshift              = fr->fshift[0];
+    velecgrp            = kernel_data->energygrp_elec;
+    vvdwgrp             = kernel_data->energygrp_vdw;
+    tabscale            = kernel_data->table_elec_vdw->scale;
+    VFtab               = kernel_data->table_elec_vdw->data;
+
+    sh_ewald            = fr->ic->sh_ewald;
+    ewtab               = fr->ic->tabq_coul_FDV0;
+    ewtabscale          = fr->ic->tabq_scale;
+    ewtabhalfspace      = 0.5/ewtabscale;
+
+    rcoulomb2           = fr->rcoulomb*fr->rcoulomb;
+    rvdw                = fr->rvdw;
+    rvdw2               = rvdw*rvdw;
+    sh_invrc6           = fr->ic->sh_invrc6;
+
+    if(fr->coulomb_modifier==eintmodPOTSWITCH)
+    {
+        d               = fr->rcoulomb-fr->rcoulomb_switch;
+        elec_swV3       = -10.0/(d*d*d);
+        elec_swV4       =  15.0/(d*d*d*d);
+        elec_swV5       =  -6.0/(d*d*d*d*d);
+        elec_swF2       = -30.0/(d*d*d);
+        elec_swF3       =  60.0/(d*d*d*d);
+        elec_swF4       = -30.0/(d*d*d*d*d);
+    }
+    else
+    {
+        /* Avoid warnings from stupid compilers (looking at you, Clang!) */
+        elec_swV3 = elec_swV4 = elec_swV5 = elec_swF2 = elec_swF3 = elec_swF4 = 0.0;
+    }
+    if(fr->vdw_modifier==eintmodPOTSWITCH)
+    {
+        d               = fr->rvdw-fr->rvdw_switch;
+        vdw_swV3        = -10.0/(d*d*d);
+        vdw_swV4        =  15.0/(d*d*d*d);
+        vdw_swV5        =  -6.0/(d*d*d*d*d);
+        vdw_swF2        = -30.0/(d*d*d);
+        vdw_swF3        =  60.0/(d*d*d*d);
+        vdw_swF4        = -30.0/(d*d*d*d*d);
+    }
+    else
+    {
+        /* Avoid warnings from stupid compilers (looking at you, Clang!) */
+        vdw_swV3 = vdw_swV4 = vdw_swV5 = vdw_swF2 = vdw_swF3 = vdw_swF4 = 0.0;
+    }
+
+    bExactElecCutoff    = (fr->coulomb_modifier!=eintmodNONE) || fr->eeltype == eelRF_ZERO;
+    bExactVdwCutoff     = (fr->vdw_modifier!=eintmodNONE);
+    bExactCutoff        = bExactElecCutoff || bExactVdwCutoff;
+
+    if(bExactCutoff)
+    {
+        rcutoff  = ( fr->rcoulomb > fr->rvdw ) ? fr->rcoulomb : fr->rvdw;
+        rcutoff2 = rcutoff*rcutoff;
+    }
+    else
+    {
+        /* Fix warnings for stupid compilers */
+        rcutoff = rcutoff2 = 1e30;
+    }
+
+    /* avoid compiler warnings for cases that cannot happen */
+    nnn                 = 0;
+    eps                 = 0.0;
+    eps2                = 0.0;
+
+    /* 3 VdW parameters for buckingham, otherwise 2 */
+    nvdwparam           = (ivdw==GMX_NBKERNEL_VDW_BUCKINGHAM) ? 3 : 2;
+    table_nelements     = 12;
+
      charge              = mdatoms->chargeA;
-       type                = mdatoms->typeA;
-       facel               = fr->epsfac;
-       shiftvec            = fr->shift_vec[0];
-       vdwparam            = fr->nbfp;
-       ntype               = fr->ntype;
-       
-       for(n=0; (n<nlist->nri); n++)
+    type                = mdatoms->typeA;
+    facel               = fr->epsfac;
+    shiftvec            = fr->shift_vec[0];
+    vdwparam            = fr->nbfp;
+    ntype               = fr->ntype;
+
+    for(n=0; (n<nlist->nri); n++)
      {
-        is3              = 3*nlist->shift[n];     
+        is3              = 3*nlist->shift[n];
          shX              = shiftvec[is3];  
          shY              = shiftvec[is3+1];
          shZ              = shiftvec[is3+2];
@@ -118,9 +196,9 @@ gmx_nb_generic_kernel(t_nblist *           nlist,
          iq               = facel*charge[ii];
          nti              = nvdwparam*ntype*type[ii];
          vctot            = 0;              
-        Vvdwtot          = 0;              
-        fix              = 0;              
-        fiy              = 0;              
+        vvdwtot          = 0;
+        fix              = 0;
+        fiy              = 0;
          fiz              = 0;              
          
          for(k=nj0; (k<nj1); k++)
@@ -136,156 +214,223 @@ gmx_nb_generic_kernel(t_nblist *           nlist,
              rsq              = dx*dx+dy*dy+dz*dz;
              rinv             = gmx_invsqrt(rsq);
              rinvsq           = rinv*rinv;  
-                       fscal            = 0;
-                       
-                       if(icoul==3 || ivdw==3)
-                       {
-                               r                = rsq*rinv;
-                               rt               = r*tabscale;     
-                               n0               = rt;             
-                               eps              = rt-n0;          
-                               eps2             = eps*eps;        
-                               nnn              = table_nelements*n0;                                          
-                       }
-                       
-                       /* Coulomb interaction. icoul==0 means no interaction */
-                       if(icoul>0)
-                       {
-                               qq               = iq*charge[jnr]; 
-
-                               switch(icoul)
-                               {
-                                       case 1:
-                                               /* Vanilla cutoff coulomb */
-                                               vcoul            = qq*rinv;      
-                                               fscal            = vcoul*rinvsq; 
-                                               break;
-
-                                       case 2:
-                                               /* Reaction-field */
-                                               krsq             = fr->k_rf*rsq;      
-                                               vcoul            = qq*(rinv+krsq-fr->c_rf);
-                                               fscal            = qq*(rinv-2.0*krsq)*rinvsq;
-                                               break;
-
-                                       case 3:
-                                               /* Tabulated coulomb */
-                                               Y                = VFtab[nnn];     
-                                               F                = VFtab[nnn+1];   
-                                               Geps             = eps*VFtab[nnn+2];
-                                               Heps2            = eps2*VFtab[nnn+3];
-                                               nnn             += 4;
-                                               Fp               = F+Geps+Heps2;   
-                                               VV               = Y+eps*Fp;       
-                                               FF               = Fp+Geps+2.0*Heps2;
-                                               vcoul            = qq*VV;          
-                                               fscal            = -qq*FF*tabscale*rinv;
-                                               break;
-                                       
-                                       case 4:
-                                               /* GB */
-                                               gmx_fatal(FARGS,"Death & horror! GB generic interaction not implemented.\n");
-                                               break;
-                                       
-                                       default:
-                                               gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for icoul=%d.\n",icoul);
-                                               break;
-                               }
-                               vctot            = vctot+vcoul;    
-                       } /* End of coulomb interactions */
-                       
-                       
-                       /* VdW interaction. ivdw==0 means no interaction */
-                       if(ivdw>0)
-                       {
-                               tj               = nti+nvdwparam*type[jnr];
-                               
-                               switch(ivdw)
-                               {
-                                       case 1:
-                                               /* Vanilla Lennard-Jones cutoff */
-                                               c6               = vdwparam[tj];   
-                                               c12              = vdwparam[tj+1]; 
-                                               
-                                               rinvsix          = rinvsq*rinvsq*rinvsq;
-                                               Vvdw_disp        = c6*rinvsix;     
-                                               Vvdw_rep         = c12*rinvsix*rinvsix;
-                                               fscal           += (12.0*Vvdw_rep-6.0*Vvdw_disp)*rinvsq;
-                                               Vvdwtot          = Vvdwtot+Vvdw_rep-Vvdw_disp;
-                                               break;
-                                               
-                                       case 2:
-                                               /* Buckingham */
-                                               c6               = vdwparam[tj];   
-                                               cexp1            = vdwparam[tj+1]; 
-                                               cexp2            = vdwparam[tj+2]; 
-                                               
-                                               rinvsix          = rinvsq*rinvsq*rinvsq;
-                                               Vvdw_disp        = c6*rinvsix;     
-                                               br               = cexp2*rsq*rinv;
-                                               Vvdw_rep         = cexp1*exp(-br); 
-                                               fscal           += (br*Vvdw_rep-6.0*Vvdw_disp)*rinvsq;
-                                               Vvdwtot          = Vvdwtot+Vvdw_rep-Vvdw_disp;
-                                               break;
-                                               
-                                       case 3:
-                                               /* Tabulated VdW */
-                                               c6               = vdwparam[tj];   
-                                               c12              = vdwparam[tj+1]; 
-
-                                               Y                = VFtab[nnn];     
-                                               F                = VFtab[nnn+1];   
-                                               Geps             = eps*VFtab[nnn+2];
-                                               Heps2            = eps2*VFtab[nnn+3];
-                                               Fp               = F+Geps+Heps2;   
-                                               VV               = Y+eps*Fp;       
-                                               FF               = Fp+Geps+2.0*Heps2;
-                                               Vvdw_disp        = c6*VV;          
-                                               fijD             = c6*FF;          
-                                               nnn             += 4;          
-                                               Y                = VFtab[nnn];     
-                                               F                = VFtab[nnn+1];   
-                                               Geps             = eps*VFtab[nnn+2];
-                                               Heps2            = eps2*VFtab[nnn+3];
-                                               Fp               = F+Geps+Heps2;   
-                                               VV               = Y+eps*Fp;       
-                                               FF               = Fp+Geps+2.0*Heps2;
-                                               Vvdw_rep         = c12*VV;         
-                                               fijR             = c12*FF;         
-                                               fscal           += -(fijD+fijR)*tabscale*rinv;
-                                               Vvdwtot          = Vvdwtot + Vvdw_disp + Vvdw_rep;                                              
-                                               break;
-                                                                                               
-                                       default:
-                                               gmx_fatal(FARGS,"Death & horror! No generic VdW interaction for ivdw=%d.\n",ivdw);
-                                               break;
-                               }
-                       } /* end VdW interactions */
-                       
-                       
-            tx               = fscal*dx;     
-            ty               = fscal*dy;     
-            tz               = fscal*dz;     
-            fix              = fix + tx;      
-            fiy              = fiy + ty;      
-            fiz              = fiz + tz;      
+            felec            = 0;
+            fvdw             = 0;
+            velec            = 0;
+            vvdw             = 0;
+
+            if(bExactCutoff && rsq>rcutoff2)
+            {
+                continue;
+            }
+
+            if(ielec==GMX_NBKERNEL_ELEC_CUBICSPLINETABLE || ivdw==GMX_NBKERNEL_VDW_CUBICSPLINETABLE)
+            {
+                r                = rsq*rinv;
+                rt               = r*tabscale;
+                n0               = rt;
+                eps              = rt-n0;
+                eps2             = eps*eps;
+                nnn              = table_nelements*n0;
+            }
+
+            /* Coulomb interaction. ielec==0 means no interaction */
+            if(ielec!=GMX_NBKERNEL_ELEC_NONE)
+            {
+                qq               = iq*charge[jnr];
+
+                switch(ielec)
+                {
+                    case GMX_NBKERNEL_ELEC_NONE:
+                        break;
+
+                    case GMX_NBKERNEL_ELEC_COULOMB:
+                        /* Vanilla cutoff coulomb */
+                        velec            = qq*rinv;
+                        felec            = velec*rinvsq;
+                        break;
+
+                    case GMX_NBKERNEL_ELEC_REACTIONFIELD:
+                        /* Reaction-field */
+                        velec            = qq*(rinv+fr->k_rf*rsq-fr->c_rf);
+                        felec            = qq*(rinv*rinvsq-2.0*fr->k_rf);
+                        break;
+
+                    case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
+                        /* Tabulated coulomb */
+                        Y                = VFtab[nnn];
+                        F                = VFtab[nnn+1];
+                        Geps             = eps*VFtab[nnn+2];
+                        Heps2            = eps2*VFtab[nnn+3];
+                        Fp               = F+Geps+Heps2;
+                        VV               = Y+eps*Fp;
+                        FF               = Fp+Geps+2.0*Heps2;
+                        velec            = qq*VV;
+                        felec            = -qq*FF*tabscale*rinv;
+                        break;
+
+                    case GMX_NBKERNEL_ELEC_GENERALIZEDBORN:
+                        /* GB */
+                        gmx_fatal(FARGS,"Death & horror! GB generic interaction not implemented.\n");
+                        break;
+
+                    case GMX_NBKERNEL_ELEC_EWALD:
+                        ewrt             = rsq*rinv*ewtabscale;
+                        ewitab           = ewrt;
+                        eweps            = ewrt-ewitab;
+                        ewitab           = 4*ewitab;
+                        felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+                        rinvcorr         = (fr->coulomb_modifier==eintmodPOTSHIFT) ? rinv-fr->ic->sh_ewald : rinv;
+                        velec            = qq*(rinvcorr-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+                        felec            = qq*rinv*(rinvsq-felec);
+                        break;
+
+                    default:
+                        gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for ielec=%d.\n",ielec);
+                        break;
+                }
+                if(fr->coulomb_modifier==eintmodPOTSWITCH)
+                {
+                    d                = rsq*rinv-fr->rcoulomb_switch;
+                    d                = (d>0.0) ? d : 0.0;
+                    d2               = d*d;
+                    sw               = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
+                    dsw              = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
+                    velec           *= sw;
+                    felec            = felec*sw + velec*dsw;
+                }
+                if(bExactElecCutoff)
+                {
+                    felec            = (rsq<=rcoulomb2) ? felec : 0.0;
+                    velec            = (rsq<=rcoulomb2) ? velec : 0.0;
+                }
+                vctot           += velec;
+            } /* End of coulomb interactions */
+
+
+            /* VdW interaction. ivdw==0 means no interaction */
+            if(ivdw!=GMX_NBKERNEL_VDW_NONE)
+            {
+                tj               = nti+nvdwparam*type[jnr];
+
+                switch(ivdw)
+                {
+                    case GMX_NBKERNEL_VDW_NONE:
+                        break;
+
+                    case GMX_NBKERNEL_VDW_LENNARDJONES:
+                        /* Vanilla Lennard-Jones cutoff */
+                        c6               = vdwparam[tj];
+                        c12              = vdwparam[tj+1];
+                        rinvsix          = rinvsq*rinvsq*rinvsq;
+                        vvdw_disp        = c6*rinvsix;
+                        vvdw_rep         = c12*rinvsix*rinvsix;
+                        fvdw             = (vvdw_rep-vvdw_disp)*rinvsq;
+                        if(fr->vdw_modifier==eintmodPOTSHIFT)
+                        {
+                            vvdw             = (vvdw_rep-c12*sh_invrc6*sh_invrc6)*(1.0/12.0)-(vvdw_disp-c6*sh_invrc6)*(1.0/6.0);
+                        }
+                        else
+                        {
+                            vvdw             = vvdw_rep/12.0-vvdw_disp/6.0;
+                        }
+                        break;
+
+                    case GMX_NBKERNEL_VDW_BUCKINGHAM:
+                        /* Buckingham */
+                        c6               = vdwparam[tj];
+                        cexp1            = vdwparam[tj+1];
+                        cexp2            = vdwparam[tj+2];
+
+                        rinvsix          = rinvsq*rinvsq*rinvsq;
+                        vvdw_disp        = c6*rinvsix;
+                        br               = cexp2*rsq*rinv;
+                        vvdw_rep         = cexp1*exp(-br);
+                        fvdw             = (br*vvdw_rep-vvdw_disp)*rinvsq;
+                        if(fr->vdw_modifier==eintmodPOTSHIFT)
+                        {
+                            vvdw             = (vvdw_rep-cexp1*exp(-cexp2*rvdw))-(vvdw_disp-c6*sh_invrc6)/6.0;
+                        }
+                        else
+                        {
+                            vvdw             = vvdw_rep-vvdw_disp/6.0;
+                        }
+                        break;
+
+                    case GMX_NBKERNEL_VDW_CUBICSPLINETABLE:
+                        /* Tabulated VdW */
+                        c6               = vdwparam[tj];
+                        c12              = vdwparam[tj+1];
+                        Y                = VFtab[nnn+4];
+                        F                = VFtab[nnn+5];
+                        Geps             = eps*VFtab[nnn+6];
+                        Heps2            = eps2*VFtab[nnn+7];
+                        Fp               = F+Geps+Heps2;
+                        VV               = Y+eps*Fp;
+                        FF               = Fp+Geps+2.0*Heps2;
+                        vvdw_disp        = c6*VV;
+                        fijD             = c6*FF;
+                        Y                = VFtab[nnn+8];
+                        F                = VFtab[nnn+9];
+                        Geps             = eps*VFtab[nnn+10];
+                        Heps2            = eps2*VFtab[nnn+11];
+                        Fp               = F+Geps+Heps2;
+                        VV               = Y+eps*Fp;
+                        FF               = Fp+Geps+2.0*Heps2;
+                        vvdw_rep         = c12*VV;
+                        fijR             = c12*FF;
+                        fvdw             = -(fijD+fijR)*tabscale*rinv;
+                        vvdw             = vvdw_disp + vvdw_rep;
+                        break;
+
+                    default:
+                        gmx_fatal(FARGS,"Death & horror! No generic VdW interaction for ivdw=%d.\n",ivdw);
+                        break;
+                }
+                if(fr->vdw_modifier==eintmodPOTSWITCH)
+                {
+                    d                = rsq*rinv-fr->rvdw_switch;
+                    d                = (d>0.0) ? d : 0.0;
+                    d2               = d*d;
+                    sw               = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
+                    dsw              = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
+                    vvdw            *= sw;
+                    fvdw             = fvdw*sw + vvdw*dsw;
+                }
+                if(bExactVdwCutoff)
+                {
+                    fvdw             = (rsq<=rvdw2) ? fvdw : 0.0;
+                    vvdw             = (rsq<=rvdw2) ? vvdw : 0.0;
+                }
+                vvdwtot         += vvdw;
+            } /* end VdW interactions */
+
+            fscal            = felec+fvdw;
+
+            tx               = fscal*dx;
+            ty               = fscal*dy;
+            tz               = fscal*dz;
+            fix              = fix + tx;
+            fiy              = fiy + ty;
+            fiz              = fiz + tz;
              f[j3+0]          = f[j3+0] - tx;
              f[j3+1]          = f[j3+1] - ty;
              f[j3+2]          = f[j3+2] - tz;
          }
-        
+
          f[ii3+0]         = f[ii3+0] + fix;
          f[ii3+1]         = f[ii3+1] + fiy;
          f[ii3+2]         = f[ii3+2] + fiz;
          fshift[is3]      = fshift[is3]+fix;
          fshift[is3+1]    = fshift[is3+1]+fiy;
          fshift[is3+2]    = fshift[is3+2]+fiz;
-        ggid             = nlist->gid[n];         
-        Vc[ggid]         = Vc[ggid] + vctot;
-        Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
+        ggid             = nlist->gid[n];
+        velecgrp[ggid]  += vctot;
+        vvdwgrp[ggid]   += vvdwtot;
      }
-    
-    *outeriter       = nlist->nri;            
-    *inneriter       = nlist->jindex[n];               
+    /* Estimate flops, average for generic kernel:
+     * 12 flops per outer iteration
+     * 50 flops per inner iteration
+     */
+    inc_nrnb(nrnb,eNR_NBKERNEL_GENERIC,nlist->nri*12 + nlist->jindex[n]*50);
  }
-
diff --git a/src/gmxlib/nonbonded/nb_generic.h b/src/gmxlib/nonbonded/nb_generic.h

index fc44051cbc670468e940035bbd4ef2e426b02f65..845ca5699767f74a0c8674f03dfdb4a2acfef3ad 100644 (file)
--- a/src/gmxlib/nonbonded/nb_generic.h
+++ b/src/gmxlib/nonbonded/nb_generic.h
@@ -35,22 +35,19 @@
  #ifndef _nb_generic_h_
  #define _nb_generic_h_
  
+#include "nb_kernel.h"
  #include "types/simple.h"
  #include "typedefs.h"
  
  void
-gmx_nb_generic_kernel(t_nblist *           nlist,
-                                         t_forcerec *         fr,
-                                         t_mdatoms *          mdatoms,
-                                         real *               x,
-                                         real *               f,
-                                         real *               fshift,
-                                         real *               Vc,
-                                         real *               Vvdw,
-                                         real                 tabscale,  
-                                         real *               VFtab,
-                                         int *                outeriter,
-                                         int *                inneriter);
+gmx_nb_generic_kernel(t_nblist *                nlist,
+                      rvec *                    x,
+                      rvec *                    f,
+                      t_forcerec *              fr,
+                      t_mdatoms *               mdatoms,
+                      nb_kernel_data_t *        kernel_data,
+                      t_nrnb *                  nrnb);
+
  
  #endif
  
diff --git a/src/gmxlib/nonbonded/nb_generic_adress.c b/src/gmxlib/nonbonded/nb_generic_adress.c

index 052d3f2e756c31dd887830ec772e70687df5fcfa..364a568b1c700bdc049b457256eff49a72673b07 100644 (file)
--- a/src/gmxlib/nonbonded/nb_generic_adress.c
+++ b/src/gmxlib/nonbonded/nb_generic_adress.c
@@ -47,20 +47,20 @@
  #define ALMOST_ONE 1-(1e-30)
  void
  gmx_nb_generic_adress_kernel(t_nblist *           nlist,
-                                         t_forcerec *         fr,
-                                         t_mdatoms *          mdatoms,
-                                         real *               x,
-                                         real *               f,
-                                         real *               fshift,
-                                         real *               Vc,
-                                         real *               Vvdw,
-                                         real                 tabscale,
-                                         real *               VFtab,
-                                         int *                outeriter,
-                                         int *                inneriter,
-                                          gmx_bool                bCG)
+                             t_forcerec *         fr,
+                             t_mdatoms *          mdatoms,
+                             real *               x,
+                             real *               f,
+                             real *               fshift,
+                             real *               Vc,
+                             real *               Vvdw,
+                             real                 tabscale,
+                             real *               VFtab,
+                             int *                outeriter,
+                             int *                inneriter,
+                             gmx_bool                bCG)
  {
-    int           nri,ntype,table_nelements,icoul,ivdw;
+    int           nri,ntype,table_nelements,ielec,ivdw;
      real          facel,gbtabscale;
      int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid,nnn,n0;
      real          shX,shY,shZ;
@@ -96,7 +96,7 @@ gmx_nb_generic_adress_kernel(t_nblist *           nlist,
  
      force_cap = fr->adress_ex_forcecap;
  
-    icoul               = nlist->icoul;
+    ielec               = nlist->ielec;
      ivdw                = nlist->ivdw;
  
      /* avoid compiler warnings for cases that cannot happen */
@@ -107,7 +107,7 @@ gmx_nb_generic_adress_kernel(t_nblist *           nlist,
  
      /* 3 VdW parameters for buckingham, otherwise 2 */
      nvdwparam           = (nlist->ivdw==2) ? 3 : 2;
-    table_nelements     = (icoul==3) ? 4 : 0;
+    table_nelements     = (ielec==3) ? 4 : 0;
      table_nelements    += (ivdw==3) ? 8 : 0;
  
      charge              = mdatoms->chargeA;
@@ -205,72 +205,71 @@ gmx_nb_generic_adress_kernel(t_nblist *           nlist,
              rinvsq           = rinv*rinv;
  
  
-                       fscal            = 0;
+            fscal            = 0;
  
-                       if(icoul==3 || ivdw==3)
-                       {
-                               r                = rsq*rinv;
-                               rt               = r*tabscale;
-                               n0               = rt;
-                               eps              = rt-n0;
-                               eps2             = eps*eps;
-                               nnn              = table_nelements*n0;
-                       }
-
-                       /* Coulomb interaction. icoul==0 means no interaction */
-                       if(icoul>0)
-                       {
-                               qq               = iq*charge[jnr];
-
-                               switch(icoul)
-                               {
-                                       case 1:
-                                               /* Vanilla cutoff coulomb */
-                                               vcoul            = qq*rinv;
-                                               fscal            = vcoul*rinvsq;
-                                               break;
-
-                                       case 2:
-                                               /* Reaction-field */
-                                               krsq             = fr->k_rf*rsq;
-                                               vcoul            = qq*(rinv+krsq-fr->c_rf);
-                                               fscal            = qq*(rinv-2.0*krsq)*rinvsq;
-                                               break;
-
-                                       case 3:
-                                               /* Tabulated coulomb */
-                                               Y                = VFtab[nnn];
-                                               F                = VFtab[nnn+1];
-                                               Geps             = eps*VFtab[nnn+2];
-                                               Heps2            = eps2*VFtab[nnn+3];
-                                               nnn             += 4;
-                                               Fp               = F+Geps+Heps2;
-                                               VV               = Y+eps*Fp;
-                                               FF               = Fp+Geps+2.0*Heps2;
-                                               vcoul            = qq*VV;
-                                               fscal            = -qq*FF*tabscale*rinv;
-                                               break;
-
-                                       case 4:
-                                               /* GB */
-                                               gmx_fatal(FARGS,"Death & horror! GB generic interaction not implemented.\n");
-                                               break;
+            if(ielec==3 || ivdw==3)
+            {
+                r                = rsq*rinv;
+                rt               = r*tabscale;
+                n0               = rt;
+                eps              = rt-n0;
+                eps2             = eps*eps;
+                nnn              = table_nelements*n0;
+            }
  
-                                       default:
-                                               gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for icoul=%d.\n",icoul);
-                                               break;
-                               }
-                               vctot            = vctot+vcoul;
-                       } /* End of coulomb interactions */
+            /* Coulomb interaction. ielec==0 means no interaction */
+            if(ielec>0)
+            {
+                qq               = iq*charge[jnr];
  
+                switch(ielec)
+                {
+                    case 1:
+                        /* Vanilla cutoff coulomb */
+                        vcoul            = qq*rinv;
+                        fscal            = vcoul*rinvsq;
+                        break;
+
+                    case 2:
+                        /* Reaction-field */
+                        krsq             = fr->k_rf*rsq;
+                        vcoul            = qq*(rinv+krsq-fr->c_rf);
+                        fscal            = qq*(rinv-2.0*krsq)*rinvsq;
+                        break;
+
+                    case 3:
+                        /* Tabulated coulomb */
+                        Y                = VFtab[nnn];
+                        F                = VFtab[nnn+1];
+                        Geps             = eps*VFtab[nnn+2];
+                        Heps2            = eps2*VFtab[nnn+3];
+                        nnn             += 4;
+                        Fp               = F+Geps+Heps2;
+                        VV               = Y+eps*Fp;
+                        FF               = Fp+Geps+2.0*Heps2;
+                        vcoul            = qq*VV;
+                        fscal            = -qq*FF*tabscale*rinv;
+                        break;
+
+                    case 4:
+                        /* GB */
+                        gmx_fatal(FARGS,"Death & horror! GB generic interaction not implemented.\n");
+                        break;
+
+                    default:
+                        gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for ielec=%d.\n",ielec);
+                        break;
+                }
+                vctot            = vctot+vcoul;
+            } /* End of coulomb interactions */
  
-                       /* VdW interaction. ivdw==0 means no interaction */
-                       if(ivdw>0)
-                       {
-                               tj               = nti+nvdwparam*type[jnr];
+            /* VdW interaction. ivdw==0 means no interaction */
+            if(ivdw>0)
+            {
+                tj               = nti+nvdwparam*type[jnr];
  
-                               switch(ivdw)
-                               {
+                switch(ivdw)
+                {
                                         case 1:
                                                 /* Vanilla Lennard-Jones cutoff */
                                                 c6               = vdwparam[tj];
diff --git a/src/gmxlib/nonbonded/nb_generic_cg.c b/src/gmxlib/nonbonded/nb_generic_cg.c

index 400866f898e391b931fce0557d756505c8e94546..5349f26e7dfdf159466bbf28244dc0c9834236cb 100644 (file)
--- a/src/gmxlib/nonbonded/nb_generic_cg.c
+++ b/src/gmxlib/nonbonded/nb_generic_cg.c
@@ -43,22 +43,20 @@
  #include "vec.h"
  #include "typedefs.h"
  #include "nb_generic_cg.h"
+#include "nonbonded.h"
+#include "nb_kernel.h"
+#include "nrnb.h"
  
   void
- gmx_nb_generic_cg_kernel(t_nblist *           nlist,
-                          t_forcerec *         fr,
-                          t_mdatoms *          mdatoms,
-                          real *               x,
-                          real *               f,
-                          real *               fshift,
-                          real *               Vc,
-                          real *               Vvdw,
-                          real                 tabscale,  
-                          real *               VFtab,
-                          int *                outeriter,
-                          int *                inneriter)
+gmx_nb_generic_cg_kernel(t_nblist *                nlist,
+                         rvec *                    xx,
+                         rvec *                    ff,
+                         t_forcerec *              fr,
+                         t_mdatoms *               mdatoms,
+                         nb_kernel_data_t *        kernel_data,
+                         t_nrnb *                  nrnb)
  {
-     int           nri,ntype,table_nelements,icoul,ivdw;
+     int           nri,ntype,table_nelements,ielec,ivdw;
       real          facel,gbtabscale;
       int           n,is3,i3,k,nj0,nj1,j3,ggid,nnn,n0;
       int           ai0,ai1,ai,aj0,aj1,aj;
@@ -83,10 +81,25 @@
       int *         shift;
       int *         type;
       t_excl *      excl;
-       
-     icoul               = nlist->icoul;
+     real *        fshift;
+     real *        Vc;
+     real *        Vvdw;
+     real          tabscale;
+     real *        VFtab;
+     real *        x;
+     real *        f;
+
+     x                   = xx[0];
+     f                   = ff[0];
+     ielec               = nlist->ielec;
       ivdw                = nlist->ivdw;
-     
+
+     fshift              = fr->fshift[0];
+     Vc                  = kernel_data->energygrp_elec;
+     Vvdw                = kernel_data->energygrp_vdw;
+     tabscale            = kernel_data->table_elec_vdw->scale;
+     VFtab               = kernel_data->table_elec_vdw->data;
+
       /* avoid compiler warnings for cases that cannot happen */
       nnn                 = 0;
       vcoul               = 0.0;
@@ -95,7 +108,7 @@
       
       /* 3 VdW parameters for buckingham, otherwise 2 */
       nvdwparam           = (nlist->ivdw==2) ? 3 : 2;
-     table_nelements     = (icoul==3) ? 4 : 0;
+     table_nelements     = (ielec==3) ? 4 : 0;
       table_nelements    += (ivdw==3) ? 8 : 0;
       
       charge              = mdatoms->chargeA;
@@ -162,7 +175,7 @@
                       rinvsq           = rinv*rinv;  
                       fscal            = 0;
  
-                     if (icoul==3 || ivdw==3)
+                     if (ielec==3 || ivdw==3)
                       {
                           r                = rsq*rinv;
                           rt               = r*tabscale;     
@@ -172,12 +185,12 @@
                           nnn              = table_nelements*n0;                                        
                       }
                       
-                     /* Coulomb interaction. icoul==0 means no interaction */
-                     if (icoul > 0)
+                     /* Coulomb interaction. ielec==0 means no interaction */
+                     if (ielec > 0)
                       {
                           qq               = iq*charge[aj]; 
                           
-                         switch(icoul)
+                         switch(ielec)
                           {
                           case 1:
                               /* Vanilla cutoff coulomb */
@@ -212,7 +225,7 @@
                             break;
                             
                           default:
-                             gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for icoul=%d.\n",icoul);
+                             gmx_fatal(FARGS,"Death & horror! No generic coulomb interaction for ielec=%d.\n",ielec);
                               break;
                           }
                           vctot            = vctot+vcoul;    
@@ -310,8 +323,10 @@
           Vc[ggid]        += vctot;
           Vvdw[ggid]      += Vvdwtot;
       }
-     
-     *outeriter       = nlist->nri;            
-     *inneriter       = nlist->jindex[n];              
+    /* Estimate flops, average for generic cg kernel:
+     * 12  flops per outer iteration
+     * 100 flops per inner iteration
+     */
+    inc_nrnb(nrnb,eNR_NBKERNEL_FREE_ENERGY,nlist->nri*12 + nlist->jindex[n]*100);
  }
  
diff --git a/src/gmxlib/nonbonded/nb_generic_cg.h b/src/gmxlib/nonbonded/nb_generic_cg.h

index ac50315688439da5c66d535d9540575f32449693..5e760522d5974a4d51b9eae7b44c5fe65c01c4c6 100644 (file)
--- a/src/gmxlib/nonbonded/nb_generic_cg.h
+++ b/src/gmxlib/nonbonded/nb_generic_cg.h
@@ -38,20 +38,17 @@
  
  #include "types/simple.h"
  #include "typedefs.h"
+#include "nb_kernel.h"
+#include "nrnb.h"
  
  void
-gmx_nb_generic_cg_kernel(t_nblist *           nlist,
-                        t_forcerec *         fr,
-                        t_mdatoms *          mdatoms,
-                        real *               x,
-                        real *               f,
-                        real *               fshift,
-                        real *               Vc,
-                        real *               Vvdw,
-                        real                 tabscale,  
-                        real *               VFtab,
-                        int *                outeriter,
-                        int *                inneriter);
+gmx_nb_generic_cg_kernel(t_nblist *                nlist,
+                         rvec *                    x,
+                         rvec *                    f,
+                         t_forcerec *              fr,
+                         t_mdatoms *               mdatoms,
+                         nb_kernel_data_t *        kernel_data,
+                         t_nrnb *                  nrnb);
  
  #endif
  
diff --git a/src/gmxlib/nonbonded/nb_kernel.c b/src/gmxlib/nonbonded/nb_kernel.c

new file mode 100644 (file)

index 0000000..c210e21
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel.c
@@ -0,0 +1,177 @@
+/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ *
+ * This file is part of GROMACS.
+ * Copyright (c) 2012-
+ *
+ * Written by the Gromacs development team under coordination of
+ * David van der Spoel, Berk Hess, and Erik Lindahl.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org
+ *
+ * And Hey:
+ * Gnomes, ROck Monsters And Chili Sauce
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+
+#include <stdio.h>
+#include <string.h>
+
+#include "nb_kernel.h"
+#include "smalloc.h"
+#include "string2.h"
+
+
+/* Static data structures to find kernels */
+static nb_kernel_info_t *   kernel_list           = NULL;
+static unsigned int         kernel_list_size      = 0;
+static int *                kernel_list_hash      = NULL;
+static unsigned int         kernel_list_hash_size = 0;
+
+static unsigned int
+nb_kernel_hash_func(const char *   arch,
+                    const char *   elec,
+                    const char *   elec_mod,
+                    const char *   vdw,
+                    const char *   vdw_mod,
+                    const char *   geom,
+                    const char *   other,
+                    const char *   vf)
+{
+    unsigned int hash;
+
+    hash = gmx_string_hash_func(arch,gmx_string_hash_init);
+    hash = gmx_string_hash_func(elec,hash);
+    hash = gmx_string_hash_func(elec_mod,hash);
+    hash = gmx_string_hash_func(vdw,hash);
+    hash = gmx_string_hash_func(vdw_mod,hash);
+    hash = gmx_string_hash_func(geom,hash);
+    hash = gmx_string_hash_func(other,hash);
+    hash = gmx_string_hash_func(vf,hash);
+
+    return hash;
+}
+
+void
+nb_kernel_list_add_kernels(nb_kernel_info_t *   new_kernel_list,
+                           int                  new_kernel_list_size)
+{
+    srenew(kernel_list,kernel_list_size+new_kernel_list_size);
+    memcpy(kernel_list+kernel_list_size,new_kernel_list,new_kernel_list_size*sizeof(nb_kernel_info_t));
+    kernel_list_size += new_kernel_list_size;
+}
+
+
+int
+nb_kernel_list_hash_init(void)
+{
+    unsigned int            i;
+    unsigned int            index;
+
+    kernel_list_hash_size   = kernel_list_size*5;
+    snew(kernel_list_hash,kernel_list_hash_size);
+
+    for(i=0;i<kernel_list_hash_size;i++)
+    {
+        kernel_list_hash[i] = -1;
+    }
+    for(i=0;i<kernel_list_size;i++)
+    {
+        index=nb_kernel_hash_func(kernel_list[i].architecture,
+                                  kernel_list[i].electrostatics,
+                                  kernel_list[i].electrostatics_modifier,
+                                  kernel_list[i].vdw,
+                                  kernel_list[i].vdw_modifier,
+                                  kernel_list[i].geometry,
+                                  kernel_list[i].other,
+                                  kernel_list[i].vf) % kernel_list_hash_size;
+
+        /* Check for collisions and advance if necessary */
+        while( kernel_list_hash[index] != -1 )
+        {
+            index = (index+1) % kernel_list_hash_size;
+        }
+
+        kernel_list_hash[index] = i;
+    }
+    return 0;
+}
+
+void
+nb_kernel_list_hash_destroy()
+{
+    sfree(kernel_list_hash);
+    kernel_list_hash = NULL;
+    kernel_list_hash_size = 0;
+}
+
+
+nb_kernel_t *
+nb_kernel_list_findkernel(FILE *              log,
+                          const char *        arch,
+                          const char *        electrostatics,
+                          const char *        electrostatics_modifier,
+                          const char *        vdw,
+                          const char *        vdw_modifier,
+                          const char *        geometry,
+                          const char *        other,
+                          const char *        vf)
+{
+    int                 i;
+    unsigned int        index;
+    nb_kernel_info_t *  kernelinfo_ptr;
+
+    if(kernel_list_hash_size==0)
+    {
+        return NULL;
+    }
+
+    index=nb_kernel_hash_func(arch,
+                              electrostatics,
+                              electrostatics_modifier,
+                              vdw,
+                              vdw_modifier,
+                              geometry,
+                              other,
+                              vf) % kernel_list_hash_size;
+
+    kernelinfo_ptr = NULL;
+    while( (i=kernel_list_hash[index]) != -1)
+    {
+        if(!gmx_strcasecmp_min(kernel_list[i].architecture,arch) &&
+           !gmx_strcasecmp_min(kernel_list[i].electrostatics,electrostatics) &&
+           !gmx_strcasecmp_min(kernel_list[i].electrostatics_modifier,electrostatics_modifier) &&
+           !gmx_strcasecmp_min(kernel_list[i].vdw,vdw) &&
+           !gmx_strcasecmp_min(kernel_list[i].vdw_modifier,vdw_modifier) &&
+           !gmx_strcasecmp_min(kernel_list[i].geometry,geometry) &&
+           !gmx_strcasecmp_min(kernel_list[i].other,other) &&
+           !gmx_strcasecmp_min(kernel_list[i].vf,vf))
+        {
+            kernelinfo_ptr = kernel_list+i;
+            break;
+        }
+        index = (index+1) % kernel_list_hash_size;
+    }
+
+    if(log && kernelinfo_ptr!=NULL)
+    {
+        fprintf(log,
+                "NB kernel %s() with architecture '%s' used for neighborlist with\n"
+                "    Elec: '%s', Modifier: '%s'\n"
+                "    Vdw:  '%s', Modifier: '%s'\n"
+                "    Geom: '%s', Other: '%s', Calc: '%s'\n\n",
+                kernelinfo_ptr->kernelname,arch,electrostatics,electrostatics_modifier,
+                vdw,vdw_modifier,geometry,other,vf);
+    }
+
+    /* If we did not find any kernel the pointer will still be NULL */
+    return (kernelinfo_ptr !=NULL) ? kernelinfo_ptr->kernelptr : NULL;
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel.h b/src/gmxlib/nonbonded/nb_kernel.h

new file mode 100644 (file)

index 0000000..2830ec6
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel.h
@@ -0,0 +1,159 @@
+/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ *
+ * This file is part of GROMACS.
+ * Copyright (c) 2012-
+ *
+ * Written by the Gromacs development team under coordination of
+ * David van der Spoel, Berk Hess, and Erik Lindahl.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org
+ *
+ * And Hey:
+ * Gnomes, ROck Monsters And Chili Sauce
+ */
+#ifndef _nb_kernel_h_
+#define _nb_kernel_h_
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if 0
+} /* fixes auto-indentation problems */
+#endif
+
+
+#include "types/simple.h"
+#include "typedefs.h"
+
+/* Structure to collect kernel data not available in forcerec or mdatoms structures.
+ * This is only used inside the nonbonded module.
+ */
+typedef struct
+{
+    int                flags;
+    t_blocka *         exclusions;
+    real *             lambda;
+    real *             dvdl;
+
+    /* pointers to tables */
+    t_forcetable *     table_elec;
+    t_forcetable *     table_vdw;
+    t_forcetable *     table_elec_vdw;
+
+    /* potentials */
+    real *             energygrp_elec;
+    real *             energygrp_vdw;
+    real *             energygrp_polarization;
+}
+nb_kernel_data_t;
+
+
+typedef void
+nb_kernel_t(t_nblist *                nlist,
+            rvec *                    x,
+            rvec *                    f,
+            t_forcerec *              fr,
+            t_mdatoms *               mdatoms,
+            nb_kernel_data_t *        kernel_data,
+            t_nrnb *                  nrnb);
+
+
+/* Structure with a kernel pointer and settings. This cannot be abstract
+ * since we define the kernel list statically for each architecture in a header,
+ * and use it to set up the kernel hash functions to find kernels.
+ *
+ * The electrostatics/vdw names should be obvious and correspond to the
+ * forms of the interactions calculated in this function, and the interaction
+ * modifiers describe switch/shift and similar alterations. Geometry refers
+ * to whether this kernel calculates interactions between single particles or
+ * waters (groups of 3/4 atoms) for better performance. Finally, the VF string
+ * selects whether the kernel calculates only potential, only force, or both
+ *
+ * The allowed values for kernel interactions are described by the
+ * enumerated types gmx_nbkernel_elec and gmx_nbkernel_vdw (see types/enums.h).
+ * Note that these are deliberately NOT identical to the interactions the
+ * user can set, since some user-specified interactions will be tabulated, and
+ * Lennard-Jones and Buckingham use different kernels while their setting in
+ * the input is decided by nonbonded parameter formats rather than mdp options.
+ *
+ * The interaction modifiers are described by the eintmod enum type, while
+ * the kernel geometry is decided from the neighborlist geometry, which is
+ * described by the enum gmx_nblist_kernel_geometry (again, see types/enums.h).
+ * The 
+ *
+ * Note that any particular implementation of kernels might not support all of 
+ * these strings. In fact, some might not be supported by any architecture yet.
+ * The whole point of using strings and hashes is that we do not have to define a 
+ * unique set of strings in a single place. Thus, as long as you implement a
+ * corresponding kernel, you could in theory provide any string you want.
+ */
+typedef struct nb_kernel_info
+{
+    nb_kernel_t *   kernelptr;
+    const char *    kernelname;
+    const char *    architecture;     /* e.g. "C", "SSE", "BlueGene", etc. */
+
+    const char *    electrostatics;
+    const char *    electrostatics_modifier;
+    const char *    vdw;
+    const char *    vdw_modifier;
+    const char *    geometry;
+    const char *    other;  /* Any extra info you want/need to select a kernel */
+    const char *    vf;     /* "PotentialAndForce", "Potential", or "Force" */
+}
+nb_kernel_info_t;
+
+
+void
+nb_kernel_list_add_kernels(nb_kernel_info_t *   new_kernelinfo,
+                           int                  new_size);
+
+int
+nb_kernel_list_hash_init(void);
+
+/* Return a function pointer to the nonbonded kernel pointer with
+ * settings according to the text strings provided. GROMACS does not guarantee
+ * the existence of accelerated kernels for any combination, so the return value
+ * can be NULL.
+ * In that case, you can try a different/lower-level acceleration, and
+ * eventually you need to prepare to fall back to generic kernels or change
+ * your settings and try again.
+ *
+ * The names of the text strings are obviously meant to reflect settings in
+ * GROMACS, but inside this routine they are merely used as a set of text
+ * strings not defined here. The routine will simply compare the arguments with
+ * the contents of the corresponding strings in the nb_kernel_list_t structure.
+ *
+ * This function does not check whether the kernel in question can run on the
+ * present architecture since that would require a slow cpuid call for every
+ * single invocation.
+ */
+nb_kernel_t *
+nb_kernel_list_findkernel(FILE *              log,
+                          const char *        architecture,
+                          const char *        electrostatics,
+                          const char *        electrostatics_modifier,
+                          const char *        vdw,
+                          const char *        vdw_modifier,
+                          const char *        geometry,
+                          const char *        other,
+                          const char *        vf);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _nb_kernel_h_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.c b/src/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.c

index c18cad56d66bf1b6e34be4846c38ac024a643cf9..c5aa611c83579f057d350b6454070026871c0b48 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.c
+++ b/src/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.c
@@ -36,7 +36,7 @@
  
  #include "types/nrnb.h"
  #include "nb_kernel_c_adress.h"
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  
  /* Include standard kernel headers in local directory */
diff --git a/src/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.h b/src/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.h

index a8956bb4110eee2818d34cccac364c9173928e6a..29013393ced511ae790b59b9919925ff9868daa5 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.h
+++ b/src/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel_c_adress.h
@@ -38,7 +38,7 @@
  
  #include <stdio.h>
  
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  void
  nb_kernel_setup_adress(FILE *fplog,nb_adress_kernel_t **list);
diff --git a/src/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_bluegene.c b/src/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_bluegene.c

index a5290c7efd49806c1d16d5bf506ba5d9e27da997..053aac3c23fee69bf38f2e6dab07ac8185d12bd9 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_bluegene.c
+++ b/src/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_bluegene.c
@@ -108,7 +108,7 @@
  
  
  
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  #include "nb_kernel_bluegene.h"
  
  
diff --git a/src/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_bluegene.h b/src/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_bluegene.h

index 4e3800bcf4d717a1218b502be62b0d4e9738b2c1..bcf14d6201a9fd4a9b6c71840a6fa8d1afbd00c5 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_bluegene.h
+++ b/src/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_bluegene.h
@@ -28,7 +28,7 @@
  
  #include <types/simple.h>
  
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  void
  nb_kernel_setup_bluegene(FILE *log,nb_kernel_t **list);
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py b/src/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py

new file mode 100755 (executable)

index 0000000..d491308
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/make_nb_kernel_c.py
@@ -0,0 +1,489 @@
+#!/usr/bin/python
+
+import sys
+import os
+sys.path.append ( "../preprocessor" )
+from gmxpreprocess import gmxpreprocess
+
+# "The happiest programs are programs that write other programs."
+#
+#
+# This script controls the generation of Gromacs nonbonded kernels.
+#
+# We no longer generate kernels on-the-fly, so this file is not run
+# during a Gromacs compile - only when we need to update the kernels (=rarely).
+#
+# To maximize performance, each combination of interactions in Gromacs
+# has a separate nonbonded kernel without conditionals in the code.
+# To avoid writing hundreds of different routines for each architecture,
+# we instead use a custom preprocessor so we can encode the conditionals
+# and expand for-loops (e.g, for water-water interactions)
+# from a general kernel template. While that file will contain quite a
+# few preprocessor directives, it is still an order of magnitude easier
+# to maintain than ~200 different kernels (not to mention it avoids bugs).
+#
+# To actually generate the kernels, this program iteratively calls the
+# preprocessor with different define settings corresponding to all
+# combinations of coulomb/van-der-Waals/geometry options.
+#
+# A main goal in the design was to make this new generator _general_. For
+# this reason we have used a lot of different fields to identify a particular
+# kernel and interaction. Basically, each kernel will have a name like
+#
+# nbkernel_ElecXX_VdwYY_GeomZZ_VF_QQ()
+#
+# Where XX/YY/ZZ/VF are strings to identify what the kernel computes.
+#
+# Elec/Vdw describe the type of interaction for electrostatics and van der Waals.
+# The geometry settings correspond e.g. to water-water or water-particle kernels,
+# and finally the VF setting is V,F,or VF depending on whether we calculate
+# only the potential, only the force, or both of them. The final string (QQ)
+# is the architecture/language/optimization of the kernel.
+#
+Arch       = 'c'
+
+# Explanation of the 'properties':
+#
+# It is cheap to compute r^2, and the kernels require various other functions of r for
+# different kinds of interaction. Depending on the needs of the kernel and the available
+# processor instructions, this will be done in different ways.
+#
+# 'rinv' means we need 1/r, which is calculated as 1/sqrt(r^2).
+# 'rinvsq' means we need 1/(r*r). This is calculated as rinv*rinv if we already did rinv, otherwise 1/r^2.
+# 'r' is similarly calculated as r^2*rinv when needed
+# 'table' means the interaction is tabulated, in which case we will calculate a table index before the interaction
+# 'shift' means the interaction will be modified by a constant to make it zero at the cutoff.
+# 'cutoff' means the interaction is set to 0.0 outside the cutoff
+#
+
+FileHeader = \
+'/*\n' \
+' * Note: this file was generated by the Gromacs '+Arch+' kernel generator.\n' \
+' *\n' \
+' *                This source code is part of\n' \
+' *\n' \
+' *                 G   R   O   M   A   C   S\n' \
+' *\n' \
+' * Copyright (c) 2001-2012, The GROMACS Development Team\n' \
+' *\n' \
+' * Gromacs is a library for molecular simulation and trajectory analysis,\n' \
+' * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for\n' \
+' * a full list of developers and information, check out http://www.gromacs.org\n' \
+' *\n' \
+' * This program is free software; you can redistribute it and/or modify it under\n' \
+' * the terms of the GNU Lesser General Public License as published by the Free\n' \
+' * Software Foundation; either version 2 of the License, or (at your option) any\n' \
+' * later version.\n' \
+' *\n' \
+' * To help fund GROMACS development, we humbly ask that you cite\n' \
+' * the papers people have written on it - you can find them on the website.\n' \
+' */\n'
+
+###############################################
+# ELECTROSTATICS
+# Interactions and flags for them
+###############################################
+ElectrostaticsList = {
+    'None'                    : [],
+    'Coulomb'                 : ['rinv','rinvsq'],
+    'ReactionField'           : ['rinv','rinvsq'],
+    'GeneralizedBorn'         : ['rinv','r'],
+    'CubicSplineTable'        : ['rinv','r','table'],
+    'Ewald'                   : ['rinv','rinvsq','r'],
+}
+
+
+###############################################
+# VAN DER WAALS
+# Interactions and flags for them
+###############################################
+VdwList = {
+    'None'                    : [],
+    'LennardJones'            : ['rinvsq'],
+    'Buckingham'              : ['rinv','rinvsq','r'],
+    'CubicSplineTable'        : ['rinv','r','table'],
+}
+
+
+###############################################
+# MODIFIERS
+# Different ways to adjust/modify interactions to conserve energy
+###############################################
+ModifierList = {
+    'None'                    : [],
+    'ExactCutoff'             : ['exactcutoff'],        # Zero the interaction outside the cutoff, used for reaction-field-zero
+    'PotentialShift'          : ['shift','exactcutoff'],
+    'PotentialSwitch'         : ['r','switch','exactcutoff']
+}
+
+
+###############################################
+# GEOMETRY COMBINATIONS
+###############################################
+GeometryNameList = [
+    [ 'Particle' , 'Particle' ],
+    [ 'Water3'   , 'Particle' ],
+    [ 'Water3'   , 'Water3'   ],     
+    [ 'Water4'   , 'Particle' ],
+    [ 'Water4'   , 'Water4'   ]
+]
+
+
+###############################################
+# POTENTIAL / FORCE
+###############################################
+VFList = [
+    'PotentialAndForce',
+# 'Potential',   # Not used yet
+    'Force'
+]
+
+
+###############################################
+# GEOMETRY PROPERTIES
+###############################################
+# Dictionaries with lists telling which interactions are present
+# 1,2,3 means particles 1,2,3 (but not 0) have electrostatics!
+GeometryElectrostatics = {
+    'Particle'  : [ 0 ],
+    'Particle2' : [ 0 , 1 ],
+    'Particle3' : [ 0 , 1 , 2 ],
+    'Particle4' : [ 0 , 1 , 2 , 3 ],
+    'Water3'    : [ 0 , 1 , 2 ],
+    'Water4'    : [ 1 , 2 , 3 ]
+}
+
+GeometryVdw = {
+    'Particle'  : [ 0 ],
+    'Particle2' : [ 0 , 1 ],
+    'Particle3' : [ 0 , 1 , 2 ],
+    'Particle4' : [ 0 , 1 , 2 , 3 ],
+    'Water3'    : [ 0 ],
+    'Water4'    : [ 0 ]
+}
+
+
+
+
+# Dictionary to abbreviate all strings (mixed from all the lists)
+Abbreviation = {
+    'None'                    : 'None',
+    'Coulomb'                 : 'Coul',
+    'Ewald'                   : 'Ew',
+    'ReactionField'           : 'RF',
+    'GeneralizedBorn'         : 'GB',
+    'CubicSplineTable'        : 'CSTab',
+    'LennardJones'            : 'LJ',
+    'Buckingham'              : 'Bham',
+    'PotentialShift'          : 'Sh',
+    'PotentialSwitch'         : 'Sw',
+    'ExactCutoff'             : 'Cut',
+    'PotentialAndForce'       : 'VF',
+    'Potential'               : 'V',
+    'Force'                   : 'F',
+    'Water3'                  : 'W3',
+    'Water4'                  : 'W4',
+    'Particle'                : 'P1',
+    'Particle2'               : 'P2',
+    'Particle3'               : 'P3',
+    'Particle4'               : 'P4'
+}
+
+
+###############################################
+# Functions
+###############################################
+
+# Return a string with the kernel name from current settings
+def MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom):
+    ElecStr = 'Elec' + Abbreviation[KernelElec]
+    if(KernelElecMod!='None'):
+        ElecStr = ElecStr + Abbreviation[KernelElecMod]
+    VdwStr  = 'Vdw'  + Abbreviation[KernelVdw]
+    if(KernelVdwMod!='None'):
+        VdwStr = VdwStr + Abbreviation[KernelVdwMod]
+    GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]]
+    return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + Arch
+
+def MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
+    ElecStr = 'Elec' + Abbreviation[KernelElec]
+    if(KernelElecMod!='None'):
+        ElecStr = ElecStr + Abbreviation[KernelElecMod]
+    VdwStr  = 'Vdw'  + Abbreviation[KernelVdw]
+    if(KernelVdwMod!='None'):
+        VdwStr = VdwStr + Abbreviation[KernelVdwMod]
+    GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]]
+    VFStr   = Abbreviation[KernelVF]
+    return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + VFStr + '_' + Arch
+
+# Return a string with a declaration to use for the kernel;
+# this will be a sequence of string combinations as well as the actual function name
+# Dont worry about field widths - that is just pretty-printing for the header!
+def MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF):
+    KernelStr   = '\"'+KernelName+'\"'
+    ArchStr     = '\"'+Arch+'\"'
+    ElecStr     = '\"'+KernelElec+'\"'
+    ElecModStr  = '\"'+KernelElecMod+'\"'
+    VdwStr      = '\"'+KernelVdw+'\"'
+    VdwModStr   = '\"'+KernelVdwMod+'\"'
+    GeomStr     = '\"'+KernelGeom[0]+KernelGeom[1]+'\"'
+    OtherStr    = '\"'+KernelOther+'\"'
+    VFStr       = '\"'+KernelVF+'\"'
+
+    ThisSpec = ArchStr+', '+ElecStr+', '+ElecModStr+', '+VdwStr+', '+VdwModStr+', '+GeomStr+', '+OtherStr+', '+VFStr
+    ThisDecl = '    { '+KernelName+', '+KernelStr+', '+ThisSpec+' }'
+    return ThisDecl
+
+
+# Returns 1 if this kernel should be created, 0 if we should skip it
+# This routine is not critical - it is not the end of the world if we create more kernels,
+# but since the number is pretty large we save both space and compile-time by reducing it a bit.
+def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
+
+    # No need for kernels without interactions
+    if(KernelElec=='None' and KernelVdw=='None'):
+        return 0
+
+    # No need for modifiers without interactions
+    if((KernelElec=='None' and KernelElecMod!='None') or (KernelVdw=='None' and KernelVdwMod!='None')):
+        return 0
+
+    # No need for LJ-only water optimization, or water optimization with implicit solvent.
+    if('Water' in KernelGeom[0] and (KernelElec=='None' or 'GeneralizedBorn' in KernelElec)):
+        return 0
+
+    # Non-matching table settings are pointless
+    if( ('Table' in KernelElec) and ('Table' in KernelVdw) and KernelElec!=KernelVdw ):
+        return 0
+
+    # Try to reduce the number of different switch/shift options to get a reasonable number of kernels
+    # For electrostatics, reaction-field can use 'exactcutoff', and ewald can use switch or shift.
+    if(KernelElecMod=='ExactCutoff' and KernelElec!='ReactionField'):
+        return 0
+    if(KernelElecMod in ['PotentialShift','PotentialSwitch'] and KernelElec!='Ewald'):
+        return 0
+    # For Vdw, we support switch and shift for Lennard-Jones/Buckingham
+    if((KernelVdwMod=='ExactCutoff') or
+       (KernelVdwMod in ['PotentialShift','PotentialSwitch'] and KernelVdw not in ['LennardJones','Buckingham'])):
+        return 0
+
+    # Choose either switch or shift and don't mix them...
+    if((KernelElecMod=='PotentialShift' and KernelVdwMod=='PotentialSwitch') or
+       (KernelElecMod=='PotentialSwitch' and KernelVdwMod=='PotentialShift')):
+        return 0
+
+    # Don't use a Vdw kernel with a modifier if the electrostatics one does not have one
+    if(KernelElec!='None' and KernelElecMod=='None' and KernelVdwMod!='None'):
+        return 0
+
+    # Don't use an electrostatics kernel with a modifier if the vdw one does not have one,
+    # unless the electrostatics one is reaction-field with exact cutoff.
+    if(KernelVdw!='None' and KernelVdwMod=='None' and KernelElecMod!='None'):
+        if(KernelElec=='ReactionField' and KernelVdw!='CubicSplineTable'):
+            return 0
+        elif(KernelElec!='ReactionField'):
+            return 0
+
+    return 1
+
+
+
+#
+# The preprocessor will automatically expand the interactions for water and other
+# geometries inside the kernel, but to get this right we need to setup a couple
+# of defines - we do them in a separate routine to keep the main loop clean.
+#
+# While this routine might look a bit complex it is actually quite straightforward,
+# and the best news is that you wont have to modify _anything_ for a new geometry
+# as long as you correctly define its Electrostatics/Vdw geometry in the lists above!
+#
+def SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines):
+    # What is the _name_ for the i/j group geometry?
+    igeometry            = KernelGeom[0]
+    jgeometry            = KernelGeom[1]
+    # define so we can access it in the source when the preprocessor runs
+    defines['GEOMETRY_I'] = igeometry
+    defines['GEOMETRY_J'] = jgeometry
+
+    # For the i/j groups, extract a python list of which sites have electrostatics
+    # For SPC/TIP3p this will be [1,1,1], while TIP4p (no elec on first site) will be [0,1,1,1]
+    ielec                = GeometryElectrostatics[igeometry]
+    jelec                = GeometryElectrostatics[jgeometry]
+    # Zero out the corresponding lists in case we dont do Elec
+    if(KernelElec=='None'):
+        ielec = []
+        jelec = []
+
+    # Extract similar interaction lists for Vdw interactions (example for SPC: [1,0,0])
+    iVdw                 = GeometryVdw[igeometry]
+    jVdw                 = GeometryVdw[jgeometry]
+
+    # Zero out the corresponding lists in case we dont do Vdw
+    if(KernelVdw=='None'):
+        iVdw = []
+        jVdw = []
+
+    # iany[] and jany[] contains lists of the particles actually used (for interactions) in this kernel
+    iany = list(set(ielec+iVdw))  # convert to+from set to make elements unique
+    jany = list(set(jelec+jVdw))
+
+    defines['PARTICLES_ELEC_I'] = ielec
+    defines['PARTICLES_ELEC_J'] = jelec
+    defines['PARTICLES_VDW_I']  = iVdw
+    defines['PARTICLES_VDW_J']  = jVdw
+    defines['PARTICLES_I']      = iany
+    defines['PARTICLES_J']      = jany
+
+    # elecij,Vdwij are sets with pairs of particles for which the corresponding interaction is done
+    # (and anyij again corresponds to either electrostatics or Vdw)
+    elecij = []
+    Vdwij  = []
+    anyij  = []
+
+    for i in ielec:
+        for j in jelec:
+            elecij.append([i,j])
+
+    for i in iVdw:
+        for j in jVdw:
+            Vdwij.append([i,j])
+
+    for i in iany:
+        for j in jany:
+            if [i,j] in elecij or [i,j] in Vdwij:
+                anyij.append([i,j])
+
+    defines['PAIRS_IJ']     = anyij
+
+    # Make an 2d list-of-distance-properties-to-calculate for i,j
+    ni = max(iany)+1
+    nj = max(jany)+1
+    # Each element properties[i][j] is an empty list
+    properties = [ [ [] for j in range(0,nj) ] for i in range (0,ni) ]
+    # Add properties to each set
+    for i in range(0,ni):
+        for j in range(0,nj):
+            if [i,j] in elecij:
+                properties[i][j] = properties[i][j] + ['electrostatics'] + ElectrostaticsList[KernelElec] + ModifierList[KernelElecMod]
+            if [i,j] in Vdwij:
+                properties[i][j] = properties[i][j] + ['vdw'] + VdwList[KernelVdw] + ModifierList[KernelVdwMod]
+            # Add rinv if we need r
+            if 'r' in properties[i][j]:
+                properties[i][j] = properties[i][j] + ['rinv']
+            # Add rsq if we need rinv or rinsq
+            if 'rinv' in properties[i][j] or 'rinvsq' in properties[i][j]:
+                properties[i][j] = properties[i][j] + ['rsq']
+
+    defines['INTERACTION_FLAGS']    = properties
+
+
+
+def PrintStatistics(ratio):
+    ratio = 100.0*ratio
+    print '\rGenerating %s nonbonded kernels... %5.1f%%' % (Arch,ratio),
+    sys.stdout.flush()
+
+
+
+defines = {}
+kerneldecl = []
+
+cnt     = 0.0
+nelec   = len(ElectrostaticsList)
+nVdw    = len(VdwList)
+nmod    = len(ModifierList)
+ngeom   = len(GeometryNameList)
+
+ntot    = nelec*nmod*nVdw*nmod*ngeom
+
+numKernels = 0
+
+fpdecl = open('nb_kernel_' + Arch + '.c','w')
+fpdecl.write( FileHeader )
+fpdecl.write( '#ifndef nb_kernel_' + Arch + '_h\n' )
+fpdecl.write( '#define nb_kernel_' + Arch + '_h\n\n' )
+fpdecl.write( '#include "../nb_kernel.h"\n\n' )
+
+for KernelElec in ElectrostaticsList:
+    defines['KERNEL_ELEC'] = KernelElec
+
+    for KernelElecMod in ModifierList:
+        defines['KERNEL_MOD_ELEC'] = KernelElecMod
+
+        for KernelVdw in VdwList:
+            defines['KERNEL_VDW'] = KernelVdw
+
+            for KernelVdwMod in ModifierList:
+                defines['KERNEL_MOD_VDW'] = KernelVdwMod
+
+                for KernelGeom in GeometryNameList:
+
+                    cnt += 1
+                    KernelFilename = MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom) + '.c'
+                    fpkernel = open(KernelFilename,'w')
+                    defines['INCLUDE_HEADER'] = 1  # Include header first time in new file
+                    DoHeader = 1
+
+                    for KernelVF in VFList:
+
+                        KernelName = MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF)
+
+                        defines['KERNEL_NAME'] = KernelName
+                        defines['KERNEL_VF']   = KernelVF
+
+                        # Check if this is a valid/sane/usable combination
+                        if not KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF):
+                            continue;
+
+                        # The overall kernel settings determine what the _kernel_ calculates, but for the water
+                        # kernels this does not mean that every pairwise interaction has e.g. Vdw interactions.
+                        # This routine sets defines of what to calculate for each pair of particles in those cases.
+                        SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines)
+
+                        if(DoHeader==1):
+                            fpkernel.write( FileHeader )
+
+                        gmxpreprocess('nb_kernel_template_' + Arch + '.pre', KernelName+'.tmp' , defines, force=1,contentType='C')
+                        numKernels = numKernels + 1
+
+                        defines['INCLUDE_HEADER'] = 0   # Header has been included once now
+                        DoHeader=0
+
+                        # Append temp file contents to the common kernelfile
+                        fptmp = open(KernelName+'.tmp','r')
+                        fpkernel.writelines(fptmp.readlines())
+                        fptmp.close()
+                        os.remove(KernelName+'.tmp')
+
+                        # Add a declaration for this kernel
+                        fpdecl.write('nb_kernel_t ' + KernelName + ';\n');
+
+                        # Add declaration to the buffer
+                        KernelOther=''
+                        kerneldecl.append(MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF))
+
+                    filesize = fpkernel.tell()
+                    fpkernel.close()
+                    if(filesize==0):
+                        os.remove(KernelFilename)
+
+                    PrintStatistics(cnt/ntot)
+                pass
+            pass
+        pass
+    pass
+pass
+
+# Write out the list of settings and corresponding kernels to the declaration file
+fpdecl.write( '\n\n' )
+fpdecl.write( 'nb_kernel_info_t\n' )
+fpdecl.write( 'kernellist_'+Arch+'[] =\n' )
+fpdecl.write( '{\n' )
+for decl in kerneldecl[0:-1]:
+    fpdecl.write( decl + ',\n' )
+fpdecl.write( kerneldecl[-1] + '\n' )
+fpdecl.write( '};\n\n' )
+fpdecl.write( 'int\n' )
+fpdecl.write( 'kernellist_'+Arch+'_size = sizeof(kernellist_'+Arch+')/sizeof(kernellist_'+Arch+'[0]);\n\n')
+fpdecl.write( '#endif\n')
+fpdecl.close()
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.c

deleted file mode 100644 (file)

index 15232b9..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.c
+++ /dev/null
@@ -1,428 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel010.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel010
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel010(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 33 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel010nf
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel010nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Inner loop uses 19 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 4 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.h

deleted file mode 100644 (file)

index 0793977..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel010.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL010_H_
-#define _NBKERNEL010_H_
-
-/*! \file  nb_kernel010.h
- *  \brief Nonbonded kernel 010 (LJ)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 010 with forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel010
-                (int *         nri,        int *         iinr,     
-                 int *         jindex,     int *         jjnr,   
-                 int *         shift,      real *        shiftvec,
-                 real *        fshift,     int *         gid, 
-                 real *        pos,        real *        faction,
-                 real *        charge,     real *        facel,
-                 real *        krf,        real *        crf,  
-                 real *        Vc,         int *         type,   
-                 int *         ntype,      real *        vdwparam,
-                 real *        Vvdw,       real *        tabscale,
-                 real *        VFtab,      real *        invsqrta, 
-                 real *        dvda,       real *        gbtabscale,
-                 real *        GBtab,      int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real *        work);
-
-
-/*! \brief Nonbonded kernel 010 without forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel010nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real *        work);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL010_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.c

deleted file mode 100644 (file)

index c5fbdbc..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel020.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel020
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel020(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 61 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel020nf
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel020nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Inner loop uses 48 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 4 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.h

deleted file mode 100644 (file)

index 3b1c305..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel020.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL020_H_
-#define _NBKERNEL020_H_
-
-/*! \file  nb_kernel020.h
- *  \brief Nonbonded kernel 020 (Bham)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 020 with forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel020
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 020 without forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel020nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL020_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.c

deleted file mode 100644 (file)

index b3a0154..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.c
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel030.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel030
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel030(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 54 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel030nf
- * Coulomb interaction:     Not calculated
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel030nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Inner loop uses 33 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 4 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.h

deleted file mode 100644 (file)

index d658d88..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel030.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL030_H_
-#define _NBKERNEL030_H_
-
-/*! \file  nb_kernel030.h
- *  \brief Nonbonded kernel 030 (Tab VdW)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 030 with forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel030
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 030 without forces.
- *
- *  <b>Coulomb interaction:</b> No <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel030nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL030_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.c

deleted file mode 100644 (file)

index 746b21f..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.c
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel100.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel100
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel100(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 27 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel100nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel100nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 16 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 5 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.h

deleted file mode 100644 (file)

index 56e6415..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel100.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL100_H_
-#define _NBKERNEL100_H_
-
-/*! \file  nb_kernel100.h
- *  \brief Nonbonded kernel 100 (Coul)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 100 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel100
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 100 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel100nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL100_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.c

deleted file mode 100644 (file)

index 9853826..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel101.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel101
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel101(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 80 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel101nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel101nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 47 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.h

deleted file mode 100644 (file)

index 93bd41a..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel101.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL101_H_
-#define _NBKERNEL101_H_
-
-/*! \file  nb_kernel101.h
- *  \brief Nonbonded kernel 101 (Coul, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 101 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel101
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 101 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel101nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL101_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.c

deleted file mode 100644 (file)

index f26ad66..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.c
+++ /dev/null
@@ -1,811 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel102.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel102
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel102(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 234 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel102nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel102nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 135 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.h

deleted file mode 100644 (file)

index 720b3b0..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel102.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL102_H_
-#define _NBKERNEL102_H_
-
-/*! \file  nb_kernel102.h
- *  \brief Nonbonded kernel 102 (Coul, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 102 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel102
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 102 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel102nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL102_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.c

deleted file mode 100644 (file)

index 24932d0..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel103.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel103
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel103(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 80 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel103nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel103nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 47 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.h

deleted file mode 100644 (file)

index 23ffce0..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel103.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL103_H_
-#define _NBKERNEL103_H_
-
-/*! \file  nb_kernel103.h
- *  \brief Nonbonded kernel 103 (Coul, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 103 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel103
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 103 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel103nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL103_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.c

deleted file mode 100644 (file)

index 6b1c528..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.c
+++ /dev/null
@@ -1,811 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel104.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel104
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel104(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 234 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel104nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel104nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 135 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.h

deleted file mode 100644 (file)

index d7d2421..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel104.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL104_H_
-#define _NBKERNEL104_H_
-
-/*! \file  nb_kernel104.h
- *  \brief Nonbonded kernel 104 (Coul, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 104 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel104
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 104 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel104nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL104_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.c

deleted file mode 100644 (file)

index 6cada4e..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.c
+++ /dev/null
@@ -1,449 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel110.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel110
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel110(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 38 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel110nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel110nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Inner loop uses 24 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.h

deleted file mode 100644 (file)

index b56751d..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel110.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL110_H_
-#define _NBKERNEL110_H_
-
-/*! \file  nb_kernel110.h
- *  \brief Nonbonded kernel 110 (Coul + LJ)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 110 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel110
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 110 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel110nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL110_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.c

deleted file mode 100644 (file)

index d8df08c..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.c
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel111.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel111
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel111(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 91 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel111nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel111nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 55 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.h

deleted file mode 100644 (file)

index dbf916a..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel111.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL111_H_
-#define _NBKERNEL111_H_
-
-/*! \file  nb_kernel111.h
- *  \brief Nonbonded kernel 111 (Coul + LJ, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 111 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel111
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 111 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel111nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL111_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.c

deleted file mode 100644 (file)

index 6948a9b..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.c
+++ /dev/null
@@ -1,845 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel112.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel112
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel112(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (vcoul+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 245 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel112nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel112nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 143 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.h

deleted file mode 100644 (file)

index c67acbd..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel112.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL112_H_
-#define _NBKERNEL112_H_
-
-/*! \file  nb_kernel112.h
- *  \brief Nonbonded kernel 112 (Coul + LJ, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 112 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel112
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 112 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel112nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL112_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.c

deleted file mode 100644 (file)

index ce29bbe..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.c
+++ /dev/null
@@ -1,616 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel113.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel113
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel113(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 113 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel113nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel113nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 66 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.h

deleted file mode 100644 (file)

index 415c154..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel113.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL113_H_
-#define _NBKERNEL113_H_
-
-/*! \file  nb_kernel113.h
- *  \brief Nonbonded kernel 113 (Coul + LJ, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 113 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel113
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 113 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel113nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL113_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.c

deleted file mode 100644 (file)

index 2371187..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.c
+++ /dev/null
@@ -1,898 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel114.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel114
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel114(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 267 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel114nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel114nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 154 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.h

deleted file mode 100644 (file)

index 4605e88..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel114.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL114_H_
-#define _NBKERNEL114_H_
-
-/*! \file  nb_kernel114.h
- *  \brief Nonbonded kernel 114 (Coul + LJ, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 114 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel114
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 114 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel114nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL114_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.c

deleted file mode 100644 (file)

index f43e397..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.c
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel120.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel120
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel120(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (vcoul+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 64 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel120nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel120nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Inner loop uses 51 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.h

deleted file mode 100644 (file)

index c1f04c3..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel120.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL120_H_
-#define _NBKERNEL120_H_
-
-/*! \file  nb_kernel120.h
- *  \brief Nonbonded kernel 120 (Coul + Bham)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 120 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel120
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 120 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel120nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL120_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.c

deleted file mode 100644 (file)

index 22e4fe1..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.c
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel121.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel121
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel121(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (vcoul+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 117 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel121nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel121nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 82 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.h

deleted file mode 100644 (file)

index 53a80bb..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel121.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL121_H_
-#define _NBKERNEL121_H_
-
-/*! \file  nb_kernel121.h
- *  \brief Nonbonded kernel 121 (Coul + Bham, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 121 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel121
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 121 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel121nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL121_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.c

deleted file mode 100644 (file)

index ad8d288..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.c
+++ /dev/null
@@ -1,849 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel122.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel122
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel122(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (vcoul+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 271 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel122nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel122nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 170 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.h

deleted file mode 100644 (file)

index f2c4f50..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel122.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL122_H_
-#define _NBKERNEL122_H_
-
-/*! \file  nb_kernel122.h
- *  \brief Nonbonded kernel 122 (Coul + Bham, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 122 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel122
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 122 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel122nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL122_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.c

deleted file mode 100644 (file)

index 5ddfe4c..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel123.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel123
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel123(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 141 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel123nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel123nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 95 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.h

deleted file mode 100644 (file)

index 7dc75c9..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel123.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL123_H_
-#define _NBKERNEL123_H_
-
-/*! \file  nb_kernel123.h
- *  \brief Nonbonded kernel 123 (Coul + Bham, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 123 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel123
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 123 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel123nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL123_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.c

deleted file mode 100644 (file)

index 1623be5..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.c
+++ /dev/null
@@ -1,904 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel124.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel124
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel124(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 295 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel124nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel124nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 183 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.h

deleted file mode 100644 (file)

index aa99f5e..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel124.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL124_H_
-#define _NBKERNEL124_H_
-
-/*! \file  nb_kernel124.h
- *  \brief Nonbonded kernel 124 (Coul + Bham, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 124 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel124
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 124 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel124nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL124_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.c

deleted file mode 100644 (file)

index a69b3ce..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.c
+++ /dev/null
@@ -1,505 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel130.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel130
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel130(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (vcoul)*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 59 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel130nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel130nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Inner loop uses 36 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.h

deleted file mode 100644 (file)

index 630e05d..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel130.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL130_H_
-#define _NBKERNEL130_H_
-
-/*! \file  nb_kernel130.h
- *  \brief Nonbonded kernel 130 (Coul + Tab VdW)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 130 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel130
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 130 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel130nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL130_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.c

deleted file mode 100644 (file)

index f6c4539..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.c
+++ /dev/null
@@ -1,627 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel131.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel131
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel131(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (vcoul)*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 112 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel131nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel131nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 67 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.h

deleted file mode 100644 (file)

index 361beb2..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel131.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL131_H_
-#define _NBKERNEL131_H_
-
-/*! \file  nb_kernel131.h
- *  \brief Nonbonded kernel 131 (Coul + Tab VdW, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 131 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel131
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 131 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel131nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL131_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.c

deleted file mode 100644 (file)

index ba94c59..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.c
+++ /dev/null
@@ -1,901 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel132.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel132
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel132(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (vcoul)*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 266 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel132nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel132nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv11;      
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv12;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv13;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 155 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.h

deleted file mode 100644 (file)

index 939ea4b..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel132.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL132_H_
-#define _NBKERNEL132_H_
-
-/*! \file  nb_kernel132.h
- *  \brief Nonbonded kernel 132 (Coul + Tab VdW, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 132 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel132
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 132 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel132nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL132_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.c

deleted file mode 100644 (file)

index 1c42315..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.c
+++ /dev/null
@@ -1,673 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel133.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel133
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel133(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 134 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel133nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel133nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv21;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv31;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv41;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 80 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.h

deleted file mode 100644 (file)

index 8cd04e5..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel133.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL133_H_
-#define _NBKERNEL133_H_
-
-/*! \file  nb_kernel133.h
- *  \brief Nonbonded kernel 133 (Coul + Tab VdW, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 133 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel133
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 133 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel133nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL133_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.c

deleted file mode 100644 (file)

index b2e42cb..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.c
+++ /dev/null
@@ -1,955 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel134.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel134
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel134(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif 
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-                fscal            = (vcoul)*rinvsq; 
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 288 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel134nf
- * Coulomb interaction:     Normal Coulomb
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel134nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv22;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv23;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv24;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv32;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv33;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv34;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv42;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv43;      
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb interaction */
-                vcoul            = qq*rinv44;      
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 168 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.h

deleted file mode 100644 (file)

index e60fa9a..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel134.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL134_H_
-#define _NBKERNEL134_H_
-
-/*! \file  nb_kernel134.h
- *  \brief Nonbonded kernel 134 (Coul + Tab VdW, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 134 with forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel134
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 134 without forces.
- *
- *  <b>Coulomb interaction:</b> Standard 1/r <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel134nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL134_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.c

deleted file mode 100644 (file)

index ad29fdc..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel200.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel200
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel200(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 33 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel200nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel200nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 19 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 5 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.h

deleted file mode 100644 (file)

index c7307f2..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel200.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL200_H_
-#define _NBKERNEL200_H_
-
-/*! \file  nb_kernel200.h
- *  \brief Nonbonded kernel 200 (RF Coul)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 200 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel200
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 200 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel200nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL200_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.c

deleted file mode 100644 (file)

index 8d3517f..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.c
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel201.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel201
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel201(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 98 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel201nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel201nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 56 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.h

deleted file mode 100644 (file)

index 5bb8a84..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel201.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL201_H_
-#define _NBKERNEL201_H_
-
-/*! \file  nb_kernel201.h
- *  \brief Nonbonded kernel 201 (RF Coul, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 201 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel201
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 201 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel201nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL201_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.c

deleted file mode 100644 (file)

index 197282f..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.c
+++ /dev/null
@@ -1,831 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel202.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel202
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel202(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv12-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv13-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 288 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel202nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel202nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 162 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.h

deleted file mode 100644 (file)

index 7b3203d..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel202.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL202_H_
-#define _NBKERNEL202_H_
-
-/*! \file  nb_kernel202.h
- *  \brief Nonbonded kernel 202 (RF Coul, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 202 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel202
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 202 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel202nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL202_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.c

deleted file mode 100644 (file)

index 31d6294..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.c
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel203.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel203
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel203(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv41-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 98 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel203nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel203nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 56 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.h

deleted file mode 100644 (file)

index 1c7dafc..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel203.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL203_H_
-#define _NBKERNEL203_H_
-
-/*! \file  nb_kernel203.h
- *  \brief Nonbonded kernel 203 (RF Coul, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 203 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel203
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 203 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel203nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL203_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.c

deleted file mode 100644 (file)

index e9ba676..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.c
+++ /dev/null
@@ -1,831 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel204.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel204
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel204(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv24-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv34-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv42-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv43-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv44-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 288 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel204nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel204nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          krsq;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 162 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.h

deleted file mode 100644 (file)

index 0ee1497..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel204.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL204_H_
-#define _NBKERNEL204_H_
-
-/*! \file  nb_kernel204.h
- *  \brief Nonbonded kernel 204 (RF Coul, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 204 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel204
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 204 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel204nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL204_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.c

deleted file mode 100644 (file)

index f9db17a..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.c
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel210.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel210
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel210(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 44 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel210nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel210nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Inner loop uses 27 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.h

deleted file mode 100644 (file)

index 97e4272..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel210.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL210_H_
-#define _NBKERNEL210_H_
-
-/*! \file  nb_kernel210.h
- *  \brief Nonbonded kernel 210 (RF Coul + LJ)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 210 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel210
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 210 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel210nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL210_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.c

deleted file mode 100644 (file)

index 98120c1..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.c
+++ /dev/null
@@ -1,579 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel211.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel211
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel211(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 109 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel211nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel211nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 64 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.h

deleted file mode 100644 (file)

index fb3ceb3..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel211.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL211_H_
-#define _NBKERNEL211_H_
-
-/*! \file  nb_kernel211.h
- *  \brief Nonbonded kernel 211 (RF Coul + LJ, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 211 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel211
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 211 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel211nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL211_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.c

deleted file mode 100644 (file)

index 79717cb..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.c
+++ /dev/null
@@ -1,865 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel212.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel212
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel212(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv12-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv13-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 299 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel212nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel212nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 170 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.h

deleted file mode 100644 (file)

index 2cb3689..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel212.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL212_H_
-#define _NBKERNEL212_H_
-
-/*! \file  nb_kernel212.h
- *  \brief Nonbonded kernel 212 (RF Coul + LJ, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 212 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel212
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 212 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel212nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL212_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.c

deleted file mode 100644 (file)

index 398af73..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.c
+++ /dev/null
@@ -1,624 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel213.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel213
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel213(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv41-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 131 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel213nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel213nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 75 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.h

deleted file mode 100644 (file)

index 9f26dfd..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel213.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL213_H_
-#define _NBKERNEL213_H_
-
-/*! \file  nb_kernel213.h
- *  \brief Nonbonded kernel 213 (RF Coul + LJ, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 213 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel213
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 213 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel213nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL213_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.c

deleted file mode 100644 (file)

index 8d604bc..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.c
+++ /dev/null
@@ -1,918 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel214.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel214
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel214(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv24-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv34-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv42-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv43-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv44-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 321 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel214nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel214nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 181 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.h

deleted file mode 100644 (file)

index 246665d..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel214.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL214_H_
-#define _NBKERNEL214_H_
-
-/*! \file  nb_kernel214.h
- *  \brief Nonbonded kernel 214 (RF Coul + LJ, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 214 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel214
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 214 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel214nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL214_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.c

deleted file mode 100644 (file)

index b1a366b..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.c
+++ /dev/null
@@ -1,457 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel220.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel220
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel220(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 70 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel220nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel220nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Inner loop uses 54 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.h

deleted file mode 100644 (file)

index a1a443c..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel220.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL220_H_
-#define _NBKERNEL220_H_
-
-/*! \file  nb_kernel220.h
- *  \brief Nonbonded kernel 220 (RF Coul + Bham)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 220 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel220
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 220 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel220nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL220_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.c

deleted file mode 100644 (file)

index d9a5348..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.c
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel221.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel221
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel221(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 135 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel221nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel221nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 91 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.h

deleted file mode 100644 (file)

index e6d7b54..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel221.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL221_H_
-#define _NBKERNEL221_H_
-
-/*! \file  nb_kernel221.h
- *  \brief Nonbonded kernel 221 (RF Coul + Bham, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 221 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel221
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 221 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel221nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL221_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.c

deleted file mode 100644 (file)

index 1b33bda..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.c
+++ /dev/null
@@ -1,869 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel222.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel222
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel222(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (qq*(rinv11-2.0*krsq)+br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv12-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv13-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 325 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel222nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel222nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 197 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.h

deleted file mode 100644 (file)

index 16ae5c9..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel222.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL222_H_
-#define _NBKERNEL222_H_
-
-/*! \file  nb_kernel222.h
- *  \brief Nonbonded kernel 222 (RF Coul + Bham, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 222 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel222
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 222 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel222nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL222_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.c

deleted file mode 100644 (file)

index 6008257..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.c
+++ /dev/null
@@ -1,630 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel223.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel223
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel223(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv41-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 159 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel223nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel223nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 104 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.h

deleted file mode 100644 (file)

index ee732ca..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel223.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL223_H_
-#define _NBKERNEL223_H_
-
-/*! \file  nb_kernel223.h
- *  \brief Nonbonded kernel 223 (RF Coul + Bham, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 223 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel223
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 223 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel223nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL223_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.c

deleted file mode 100644 (file)

index c25a268..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.c
+++ /dev/null
@@ -1,924 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel224.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel224
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel224(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv24-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv34-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv42-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv43-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv44-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 349 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel224nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel224nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          krsq;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 210 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.h

deleted file mode 100644 (file)

index 3be3bc5..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel224.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL224_H_
-#define _NBKERNEL224_H_
-
-/*! \file  nb_kernel224.h
- *  \brief Nonbonded kernel 224 (RF Coul + Bham, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 224 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel224
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 224 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel224nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL224_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.c

deleted file mode 100644 (file)

index 88877fb..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.c
+++ /dev/null
@@ -1,509 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel230.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel230
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel230(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 65 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel230nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel230nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Inner loop uses 39 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.h

deleted file mode 100644 (file)

index e3f352c..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel230.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL230_H_
-#define _NBKERNEL230_H_
-
-/*! \file  nb_kernel230.h
- *  \brief Nonbonded kernel 230 (RF Coul + Tab VdW)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 230 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel230
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 230 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel230nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL230_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.c

deleted file mode 100644 (file)

index f7f226e..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.c
+++ /dev/null
@@ -1,635 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel231.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel231
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel231(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 130 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel231nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel231nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 76 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.h

deleted file mode 100644 (file)

index 7d4f88d..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel231.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL231_H_
-#define _NBKERNEL231_H_
-
-/*! \file  nb_kernel231.h
- *  \brief Nonbonded kernel 231 (RF Coul + Tab VdW, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 231 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel231
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 231 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel231nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL231_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.c

deleted file mode 100644 (file)

index 03b8370..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.c
+++ /dev/null
@@ -1,921 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel232.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel232
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel232(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = (qq*(rinv11-2.0*krsq))*rinvsq-((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv12*rinv12;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv12-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv13*rinv13;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv13-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 320 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel232nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel232nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq11;      
-                vcoul            = qq*(rinv11+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq12;      
-                vcoul            = qq*(rinv12+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq13;      
-                vcoul            = qq*(rinv13+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 182 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.h

deleted file mode 100644 (file)

index e21236a..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel232.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL232_H_
-#define _NBKERNEL232_H_
-
-/*! \file  nb_kernel232.h
- *  \brief Nonbonded kernel 232 (RF Coul + Tab VdW, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 232 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel232
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 232 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel232nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL232_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.c

deleted file mode 100644 (file)

index 2c85ac8..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.c
+++ /dev/null
@@ -1,681 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel233.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel233
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel233(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-                rinvsq           = rinv21*rinv21;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv21-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv31*rinv31;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv31-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-                rinvsq           = rinv41*rinv41;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv41-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 152 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel233nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel233nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq21;      
-                vcoul            = qq*(rinv21+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq31;      
-                vcoul            = qq*(rinv31+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq41;      
-                vcoul            = qq*(rinv41+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 89 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.h

deleted file mode 100644 (file)

index 59001f9..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel233.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL233_H_
-#define _NBKERNEL233_H_
-
-/*! \file  nb_kernel233.h
- *  \brief Nonbonded kernel 233 (RF Coul + Tab VdW, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 233 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel233
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 233 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel233nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL233_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.c

deleted file mode 100644 (file)

index 9d2ddf3..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.c
+++ /dev/null
@@ -1,975 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel234.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel234
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel234(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijD,fijR;
-    real          krsq;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv22*rinv22;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv22-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv23*rinv23;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv23-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv24*rinv24;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv24-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv32*rinv32;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv32-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-                rinvsq           = rinv33*rinv33;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv33-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv34*rinv34;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv34-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv42*rinv42;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv42-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-                rinvsq           = rinv43*rinv43;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv43-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-                rinvsq           = rinv44*rinv44;  
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-                fscal            = (qq*(rinv44-2.0*krsq))*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 342 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel234nf
- * Coulomb interaction:     Reaction field
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel234nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          krsq;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq22;      
-                vcoul            = qq*(rinv22+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq23;      
-                vcoul            = qq*(rinv23+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq24;      
-                vcoul            = qq*(rinv24+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq32;      
-                vcoul            = qq*(rinv32+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq33;      
-                vcoul            = qq*(rinv33+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq34;      
-                vcoul            = qq*(rinv34+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq42;      
-                vcoul            = qq*(rinv42+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq43;      
-                vcoul            = qq*(rinv43+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Coulomb reaction-field interaction */
-                krsq             = krf*rsq44;      
-                vcoul            = qq*(rinv44+krsq-crf);
-                vctot            = vctot+vcoul;    
-
-                /* Inner loop uses 195 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.h

deleted file mode 100644 (file)

index 27f3f5b..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel234.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL234_H_
-#define _NBKERNEL234_H_
-
-/*! \file  nb_kernel234.h
- *  \brief Nonbonded kernel 234 (RF Coul + Tab VdW, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 234 with forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel234
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 234 without forces.
- *
- *  <b>Coulomb interaction:</b> Reaction-Field <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel234nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL234_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.c

deleted file mode 100644 (file)

index 4cbeac2..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.c
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel300.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel300
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel300(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 42 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel300nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel300nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-               /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 26 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 5 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.h

deleted file mode 100644 (file)

index 99516e7..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel300.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL300_H_
-#define _NBKERNEL300_H_
-
-/*! \file  nb_kernel300.h
- *  \brief Nonbonded kernel 300 (Tab Coul)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 300 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel300
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 300 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel300nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL300_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.c

deleted file mode 100644 (file)

index da8de57..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.c
+++ /dev/null
@@ -1,639 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel301.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel301
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel301(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 125 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel301nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel301nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 77 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.h

deleted file mode 100644 (file)

index c28ace6..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel301.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL301_H_
-#define _NBKERNEL301_H_
-
-/*! \file  nb_kernel301.h
- *  \brief Nonbonded kernel 301 (Tab Coul, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 301 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel301
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 301 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel301nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL301_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.c

deleted file mode 100644 (file)

index 83ee5df..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.c
+++ /dev/null
@@ -1,1115 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel302.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel302
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel302(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv12;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv13;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 369 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel302nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel302nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 225 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.h

deleted file mode 100644 (file)

index b99d307..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel302.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL302_H_
-#define _NBKERNEL302_H_
-
-/*! \file  nb_kernel302.h
- *  \brief Nonbonded kernel 302 (Tab Coul, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 302 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel302
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 302 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel302nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL302_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.c

deleted file mode 100644 (file)

index d1a7e91..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.c
+++ /dev/null
@@ -1,639 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel303.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel303
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel303(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv41;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 125 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel303nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel303nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 77 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.h

deleted file mode 100644 (file)

index df17ff6..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel303.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL303_H_
-#define _NBKERNEL303_H_
-
-/*! \file  nb_kernel303.h
- *  \brief Nonbonded kernel 303 (Tab Coul, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 303 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel303
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 303 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel303nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL303_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.c

deleted file mode 100644 (file)

index 9c842d0..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.c
+++ /dev/null
@@ -1,1115 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel304.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel304
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel304(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv24;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv34;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv42;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv43;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv44;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 369 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 28 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel304nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Not calculated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel304nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-               /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 225 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 10 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.h

deleted file mode 100644 (file)

index cf5182c..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel304.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL304_H_
-#define _NBKERNEL304_H_
-
-/*! \file  nb_kernel304.h
- *  \brief Nonbonded kernel 304 (Tab Coul, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 304 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel304
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 304 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel304nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL304_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.c

deleted file mode 100644 (file)

index d9bac05..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.c
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel310.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel310
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel310(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 55 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel310nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel310nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Inner loop uses 34 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.h

deleted file mode 100644 (file)

index 7341359..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel310.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL310_H_
-#define _NBKERNEL310_H_
-
-/*! \file  nb_kernel310.h
- *  \brief Nonbonded kernel 310 (Tab Coul + LJ)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 310 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel310
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 310 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel310nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL310_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.c

deleted file mode 100644 (file)

index a046fe3..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.c
+++ /dev/null
@@ -1,679 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel311.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel311
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel311(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 138 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel311nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel311nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 85 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.h

deleted file mode 100644 (file)

index 140691a..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel311.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL311_H_
-#define _NBKERNEL311_H_
-
-/*! \file  nb_kernel311.h
- *  \brief Nonbonded kernel 311 (Tab Coul + LJ, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 311 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel311
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 311 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel311nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL311_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.c

deleted file mode 100644 (file)

index 623b323..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.c
+++ /dev/null
@@ -1,1151 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel312.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel312
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel312(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv12;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv13;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 382 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel312nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel312nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 233 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.h

deleted file mode 100644 (file)

index e5849eb..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel312.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL312_H_
-#define _NBKERNEL312_H_
-
-/*! \file  nb_kernel312.h
- *  \brief Nonbonded kernel 312 (Tab Coul + LJ, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" 
-{
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 312 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel312
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 312 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel312nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL312_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.c

deleted file mode 100644 (file)

index c36a282..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.c
+++ /dev/null
@@ -1,723 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel313.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel313
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel313(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv41;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 158 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel313nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel313nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-               /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 96 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.h

deleted file mode 100644 (file)

index 63005e6..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel313.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL313_H_
-#define _NBKERNEL313_H_
-
-/*! \file  nb_kernel313.h
- *  \brief Nonbonded kernel 313 (Tab Coul + LJ, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 313 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel313
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 313 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel313nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL313_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.c

deleted file mode 100644 (file)

index 2116e24..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.c
+++ /dev/null
@@ -1,1203 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel314.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel314
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel314(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv24;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv34;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv42;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv43;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv44;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 402 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel314nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Lennard-Jones
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel314nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinvsq           = 1.0/rsq11;      
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 244 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.h

deleted file mode 100644 (file)

index c783dfb..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel314.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL314_H_
-#define _NBKERNEL314_H_
-
-/*! \file  nb_kernel314.h
- *  \brief Nonbonded kernel 314 (Tab Coul + LJ, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 314 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel314
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 314 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel314nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL314_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.c

deleted file mode 100644 (file)

index d5c7179..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.c
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel320.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel320
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel320(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 81 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel320nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel320nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Inner loop uses 61 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.h

deleted file mode 100644 (file)

index fa4c8e0..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel320.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL320_H_
-#define _NBKERNEL320_H_
-
-/*! \file  nb_kernel320.h
- *  \brief Nonbonded kernel 320 (Tab Coul + Bham)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 320 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel320
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 320 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel320nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL320_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.c

deleted file mode 100644 (file)

index 042786e..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.c
+++ /dev/null
@@ -1,683 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel321.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel321
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel321(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 164 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel321nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel321nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 112 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.h

deleted file mode 100644 (file)

index 076266b..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel321.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL321_H_
-#define _NBKERNEL321_H_
-
-/*! \file  nb_kernel321.h
- *  \brief Nonbonded kernel 321 (Tab Coul + Bham, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 321 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel321
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 321 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel321nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL321_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.c

deleted file mode 100644 (file)

index 874c797..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.c
+++ /dev/null
@@ -1,1155 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel322.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel322
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel322(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-((fijC)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv12;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv13;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 408 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel322nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel322nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-                rinvsq           = rinv11*rinv11;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 260 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.h

deleted file mode 100644 (file)

index 46baec9..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel322.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL322_H_
-#define _NBKERNEL322_H_
-
-/*! \file  nb_kernel322.h
- *  \brief Nonbonded kernel 322 (Tab Coul + Bham, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 322 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel322
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 322 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel322nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL322_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.c

deleted file mode 100644 (file)

index da9d900..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.c
+++ /dev/null
@@ -1,729 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel323.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel323
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel323(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv41;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 186 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel323nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel323nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 3*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 125 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.h

deleted file mode 100644 (file)

index 0d52978..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel323.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL323_H_
-#define _NBKERNEL323_H_
-
-/*! \file  nb_kernel323.h
- *  \brief Nonbonded kernel 323 (Tab Coul + Bham, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 323 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel323
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 323 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel323nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL323_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.c

deleted file mode 100644 (file)

index 6cfa305..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.c
+++ /dev/null
@@ -1,1209 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel324.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel324
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel324(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv24;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv34;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv42;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv43;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv44;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 430 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel324nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Buckingham
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel324nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 3*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    cexp1            = vdwparam[tj+1]; 
-    cexp2            = vdwparam[tj+2]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-                rinvsq           = rinv11*rinv11;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 273 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.h

deleted file mode 100644 (file)

index d5585b7..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel324.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL324_H_
-#define _NBKERNEL324_H_
-
-/*! \file  nb_kernel324.h
- *  \brief Nonbonded kernel 324 (Tab Coul + Bham, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 324 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel324
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 324 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel324nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL324_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.c

deleted file mode 100644 (file)

index e88c093..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel330.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel330
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel330(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijC+fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 68 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel330nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel330nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                qq               = iq*charge[jnr]; 
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Inner loop uses 42 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.h

deleted file mode 100644 (file)

index 71813c6..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel330.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL330_H_
-#define _NBKERNEL330_H_
-
-/*! \file  nb_kernel330.h
- *  \brief Nonbonded kernel 330 (Tab Coul + Tab VdW)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 330 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel330
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 330 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel330nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL330_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.c

deleted file mode 100644 (file)

index 679ba80..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.c
+++ /dev/null
@@ -1,708 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel331.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel331
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel331(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijC+fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 151 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel331nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      SPC/TIP3P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel331nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          qO,qH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = facel*charge[ii];
-    qH               = facel*charge[ii+1];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qO*jq;          
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 93 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.h

deleted file mode 100644 (file)

index 51a52ee..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel331.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL331_H_
-#define _NBKERNEL331_H_
-
-/*! \file  nb_kernel331.h
- *  \brief Nonbonded kernel 331 (Tab Coul + Tab VdW, SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 331 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel331
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 331 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel331nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL331_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.c

deleted file mode 100644 (file)

index 746e49d..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.c
+++ /dev/null
@@ -1,1180 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel332.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel332
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        yes
- */
-void nb_kernel332(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijC+fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv12;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx12;     
-                ty               = fscal*dy12;     
-                tz               = fscal*dz12;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv13;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx13;     
-                ty               = fscal*dy13;     
-                tz               = fscal*dz13;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Inner loop uses 395 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 29 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel332nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      pairs of SPC/TIP3P interactions
- * Calculate forces:        no
- */
-void nb_kernel332nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx12,dy12,dz12,rsq12,rinv12;
-    real          dx13,dy13,dz13,rsq13,rinv13;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          qO,qH,qqOO,qqOH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qO               = charge[ii];     
-    qH               = charge[ii+1];   
-    qqOO             = facel*qO*qO;    
-    qqOH             = facel*qO*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx12             = ix1 - jx2;      
-                dy12             = iy1 - jy2;      
-                dz12             = iz1 - jz2;      
-                rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
-                dx13             = ix1 - jx3;      
-                dy13             = iy1 - jy3;      
-                dz13             = iz1 - jz3;      
-                rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv12           = gmx_invsqrt(rsq12);
-                rinv13           = gmx_invsqrt(rsq13);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-
-                /* Load parameters for j atom */
-                qq               = qqOO;           
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq12*rinv12;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq13*rinv13;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqOH;           
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 241 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 11 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.h

deleted file mode 100644 (file)

index 89bb6e9..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel332.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL332_H_
-#define _NBKERNEL332_H_
-
-/*! \file  nb_kernel332.h
- *  \brief Nonbonded kernel 332 (Tab Coul + Tab VdW, SPC-SPC)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 332 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel332
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 332 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> SPC - SPC <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel332nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL332_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.c

deleted file mode 100644 (file)

index 4b9cc4f..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.c
+++ /dev/null
@@ -1,774 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel333.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel333
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        yes
- */
-void nb_kernel333(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1,fjx1,fjy1,fjz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = faction[j3+0] - tx;
-                fjy1             = faction[j3+1] - ty;
-                fjz1             = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv21;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx21;     
-                ty               = fscal*dy21;     
-                tz               = fscal*dz21;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv31;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx31;     
-                ty               = fscal*dy31;     
-                tz               = fscal*dz31;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx1             = fjx1 - tx;      
-                fjy1             = fjy1 - ty;      
-                fjz1             = fjz1 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv41;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx41;     
-                ty               = fscal*dy41;     
-                tz               = fscal*dz41;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = fjx1 - tx;      
-                faction[j3+1]    = fjy1 - ty;      
-                faction[j3+2]    = fjz1 - tz;      
-
-                /* Inner loop uses 179 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel333nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      TIP4P - other atoms
- * Calculate forces:        no
- */
-void nb_kernel333nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          jq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx21,dy21,dz21,rsq21,rinv21;
-    real          dx31,dy31,dz31,rsq31,rinv31;
-    real          dx41,dy41,dz41,rsq41,rinv41;
-    real          qH,qM;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = facel*charge[ii+1];
-    qM               = facel*charge[ii+3];
-    nti              = 2*ntype*type[ii];
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx21             = ix2 - jx1;      
-                dy21             = iy2 - jy1;      
-                dz21             = iz2 - jz1;      
-                rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
-                dx31             = ix3 - jx1;      
-                dy31             = iy3 - jy1;      
-                dz31             = iz3 - jz1;      
-                rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
-                dx41             = ix4 - jx1;      
-                dy41             = iy4 - jy1;      
-                dz41             = iz4 - jz1;      
-                rsq41            = dx41*dx41+dy41*dy41+dz41*dz41;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv21           = gmx_invsqrt(rsq21);
-                rinv31           = gmx_invsqrt(rsq31);
-                rinv41           = gmx_invsqrt(rsq41);
-
-                /* Load parameters for j atom */
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                jq               = charge[jnr+0];  
-                qq               = qH*jq;          
-
-                /* Calculate table index */
-                r                = rsq21*rinv21;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq31*rinv31;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qM*jq;          
-
-                /* Calculate table index */
-                r                = rsq41*rinv41;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 110 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.h

deleted file mode 100644 (file)

index 80c6903..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel333.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL333_H_
-#define _NBKERNEL333_H_
-
-/*! \file  nb_kernel333.h
- *  \brief Nonbonded kernel 333 (Tab Coul + Tab VdW, TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 333 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel333
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 333 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - other atoms <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel333nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL333_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.c

deleted file mode 100644 (file)

index 6db5c8b..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.c
+++ /dev/null
@@ -1,1254 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-#include "nb_kernel334.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel334
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        yes
- */
-void nb_kernel334(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          ix2,iy2,iz2,fix2,fiy2,fiz2;
-    real          ix3,iy3,iz3,fix3,fiy3,fiz3;
-    real          ix4,iy4,iz4,fix4,fiy4,fiz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2,fjx2,fjy2,fjz2;
-    real          jx3,jy3,jz3,fjx3,fjy3,fjz3;
-    real          jx4,jy4,jz4,fjx4,fjy4,fjz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            fix2             = 0;              
-            fiy2             = 0;              
-            fiz2             = 0;              
-            fix3             = 0;              
-            fiy3             = 0;              
-            fiz3             = 0;              
-            fix4             = 0;              
-            fiy4             = 0;              
-            fiz4             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv22;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx22;     
-                ty               = fscal*dy22;     
-                tz               = fscal*dz22;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = faction[j3+3] - tx;
-                fjy2             = faction[j3+4] - ty;
-                fjz2             = faction[j3+5] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv23;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx23;     
-                ty               = fscal*dy23;     
-                tz               = fscal*dz23;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = faction[j3+6] - tx;
-                fjy3             = faction[j3+7] - ty;
-                fjz3             = faction[j3+8] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv24;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx24;     
-                ty               = fscal*dy24;     
-                tz               = fscal*dz24;     
-
-                /* Increment i atom force */
-                fix2             = fix2 + tx;      
-                fiy2             = fiy2 + ty;      
-                fiz2             = fiz2 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = faction[j3+9] - tx;
-                fjy4             = faction[j3+10] - ty;
-                fjz4             = faction[j3+11] - tz;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv32;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx32;     
-                ty               = fscal*dy32;     
-                tz               = fscal*dz32;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx2             = fjx2 - tx;      
-                fjy2             = fjy2 - ty;      
-                fjz2             = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv33;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx33;     
-                ty               = fscal*dy33;     
-                tz               = fscal*dz33;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx3             = fjx3 - tx;      
-                fjy3             = fjy3 - ty;      
-                fjz3             = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv34;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx34;     
-                ty               = fscal*dy34;     
-                tz               = fscal*dz34;     
-
-                /* Increment i atom force */
-                fix3             = fix3 + tx;      
-                fiy3             = fiy3 + ty;      
-                fiz3             = fiz3 + tz;      
-
-                /* Decrement j atom force */
-                fjx4             = fjx4 - tx;      
-                fjy4             = fjy4 - ty;      
-                fjz4             = fjz4 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv42;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx42;     
-                ty               = fscal*dy42;     
-                tz               = fscal*dz42;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+3]    = fjx2 - tx;      
-                faction[j3+4]    = fjy2 - ty;      
-                faction[j3+5]    = fjz2 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv43;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx43;     
-                ty               = fscal*dy43;     
-                tz               = fscal*dz43;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+6]    = fjx3 - tx;      
-                faction[j3+7]    = fjy3 - ty;      
-                faction[j3+8]    = fjz3 - tz;      
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vcoul            = qq*VV;          
-                fijC             = qq*FF;          
-                vctot            = vctot + vcoul;  
-                fscal            = -((fijC)*tabscale)*rinv44;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx44;     
-                ty               = fscal*dy44;     
-                tz               = fscal*dz44;     
-
-                /* Increment i atom force */
-                fix4             = fix4 + tx;      
-                fiy4             = fiy4 + ty;      
-                fiz4             = fiz4 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+9]    = fjx4 - tx;      
-                faction[j3+10]   = fjy4 - ty;      
-                faction[j3+11]   = fjz4 - tz;      
-
-                /* Inner loop uses 423 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            faction[ii3+3]   = faction[ii3+3] + fix2;
-            faction[ii3+4]   = faction[ii3+4] + fiy2;
-            faction[ii3+5]   = faction[ii3+5] + fiz2;
-            faction[ii3+6]   = faction[ii3+6] + fix3;
-            faction[ii3+7]   = faction[ii3+7] + fiy3;
-            faction[ii3+8]   = faction[ii3+8] + fiz3;
-            faction[ii3+9]   = faction[ii3+9] + fix4;
-            faction[ii3+10]  = faction[ii3+10] + fiy4;
-            faction[ii3+11]  = faction[ii3+11] + fiz4;
-            fshift[is3]      = fshift[is3]+fix1+fix2+fix3+fix4;
-            fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3+fiy4;
-            fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3+fiz4;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 38 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel334nf
- * Coulomb interaction:     Tabulated
- * VdW interaction:         Tabulated
- * water optimization:      pairs of TIP4P interactions
- * Calculate forces:        no
- */
-void nb_kernel334nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          qq,vcoul,vctot;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          ix1,iy1,iz1;
-    real          ix2,iy2,iz2;
-    real          ix3,iy3,iz3;
-    real          ix4,iy4,iz4;
-    real          jx1,jy1,jz1;
-    real          jx2,jy2,jz2;
-    real          jx3,jy3,jz3;
-    real          jx4,jy4,jz4;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          dx22,dy22,dz22,rsq22,rinv22;
-    real          dx23,dy23,dz23,rsq23,rinv23;
-    real          dx24,dy24,dz24,rsq24,rinv24;
-    real          dx32,dy32,dz32,rsq32,rinv32;
-    real          dx33,dy33,dz33,rsq33,rinv33;
-    real          dx34,dy34,dz34,rsq34,rinv34;
-    real          dx42,dy42,dz42,rsq42,rinv42;
-    real          dx43,dy43,dz43,rsq43,rinv43;
-    real          dx44,dy44,dz44,rsq44,rinv44;
-    real          qH,qM,qqMM,qqMH,qqHH;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-
-    /* Initialize water data */
-    ii               = iinr[0];        
-    qH               = charge[ii+1];   
-    qM               = charge[ii+3];   
-    qqMM             = facel*qM*qM;    
-    qqMH             = facel*qM*qH;    
-    qqHH             = facel*qH*qH;    
-    tj               = 2*(ntype+1)*type[ii];
-    c6               = vdwparam[tj];   
-    c12              = vdwparam[tj+1]; 
-
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-            ix2              = shX + pos[ii3+3];
-            iy2              = shY + pos[ii3+4];
-            iz2              = shZ + pos[ii3+5];
-            ix3              = shX + pos[ii3+6];
-            iy3              = shY + pos[ii3+7];
-            iz3              = shZ + pos[ii3+8];
-            ix4              = shX + pos[ii3+9];
-            iy4              = shY + pos[ii3+10];
-            iz4              = shZ + pos[ii3+11];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-                jx2              = pos[j3+3];      
-                jy2              = pos[j3+4];      
-                jz2              = pos[j3+5];      
-                jx3              = pos[j3+6];      
-                jy3              = pos[j3+7];      
-                jz3              = pos[j3+8];      
-                jx4              = pos[j3+9];      
-                jy4              = pos[j3+10];     
-                jz4              = pos[j3+11];     
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-                dx22             = ix2 - jx2;      
-                dy22             = iy2 - jy2;      
-                dz22             = iz2 - jz2;      
-                rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
-                dx23             = ix2 - jx3;      
-                dy23             = iy2 - jy3;      
-                dz23             = iz2 - jz3;      
-                rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
-                dx24             = ix2 - jx4;      
-                dy24             = iy2 - jy4;      
-                dz24             = iz2 - jz4;      
-                rsq24            = dx24*dx24+dy24*dy24+dz24*dz24;
-                dx32             = ix3 - jx2;      
-                dy32             = iy3 - jy2;      
-                dz32             = iz3 - jz2;      
-                rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
-                dx33             = ix3 - jx3;      
-                dy33             = iy3 - jy3;      
-                dz33             = iz3 - jz3;      
-                rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
-                dx34             = ix3 - jx4;      
-                dy34             = iy3 - jy4;      
-                dz34             = iz3 - jz4;      
-                rsq34            = dx34*dx34+dy34*dy34+dz34*dz34;
-                dx42             = ix4 - jx2;      
-                dy42             = iy4 - jy2;      
-                dz42             = iz4 - jz2;      
-                rsq42            = dx42*dx42+dy42*dy42+dz42*dz42;
-                dx43             = ix4 - jx3;      
-                dy43             = iy4 - jy3;      
-                dz43             = iz4 - jz3;      
-                rsq43            = dx43*dx43+dy43*dy43+dz43*dz43;
-                dx44             = ix4 - jx4;      
-                dy44             = iy4 - jy4;      
-                dz44             = iz4 - jz4;      
-                rsq44            = dx44*dx44+dy44*dy44+dz44*dz44;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-                rinv22           = gmx_invsqrt(rsq22);
-                rinv23           = gmx_invsqrt(rsq23);
-                rinv24           = gmx_invsqrt(rsq24);
-                rinv32           = gmx_invsqrt(rsq32);
-                rinv33           = gmx_invsqrt(rsq33);
-                rinv34           = gmx_invsqrt(rsq34);
-                rinv42           = gmx_invsqrt(rsq42);
-                rinv43           = gmx_invsqrt(rsq43);
-                rinv44           = gmx_invsqrt(rsq44);
-
-                /* Load parameters for j atom */
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated VdW interaction - dispersion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq22*rinv22;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq23*rinv23;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq24*rinv24;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq32*rinv32;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqHH;           
-
-                /* Calculate table index */
-                r                = rsq33*rinv33;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq34*rinv34;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq42*rinv42;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMH;           
-
-                /* Calculate table index */
-                r                = rsq43*rinv43;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Load parameters for j atom */
-                qq               = qqMM;           
-
-                /* Calculate table index */
-                r                = rsq44*rinv44;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 12*n0;          
-
-                /* Tabulated coulomb interaction */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vcoul            = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 258 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 14 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.h

deleted file mode 100644 (file)

index 6f3dea0..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel334.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL334_H_
-#define _NBKERNEL334_H_
-
-/*! \file  nb_kernel334.h
- *  \brief Nonbonded kernel 334 (Tab Coul + Tab VdW, TIP4p-TIP4p)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 334 with forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel334
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 334 without forces.
- *
- *  <b>Coulomb interaction:</b> Tabulated <br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> TIP4p - TIP4p <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel334nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL334_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.c

deleted file mode 100644 (file)

index fdc00b7..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.c
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-/* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
-
-#include "nb_kernel400.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel400
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel400(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          isai,isaj,isaprod,gbscale,vgb,vgbtot;
-    real          dvdasum,dvdatmp,dvdaj,fgb;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-       gmx_gbdata_t *gbdata;
-       real *        gpol;
-       real          scale_gb;
-       
-       gbdata           = (gmx_gbdata_t *)work;
-       gpol             = gbdata->gpol;
-       
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;
-       scale_gb         = (1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent); 
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-       
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;   
-                       vgbtot           = 0;
-            dvdasum          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                fscal            = vcoul*rinv11;   
-                qq               = isaprod*(-qq)*scale_gb;  
-                gbscale          = isaprod*gbtabscale;
-
-                /* Tabulated Generalized-Born interaction */
-                dvdaj            = dvda[jnr];      
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vgb              = qq*VV;          
-                fijC             = qq*FF*gbscale;  
-                dvdatmp          = -0.5*(vgb+fijC*r);
-                dvdasum          = dvdasum + dvdatmp;
-                dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
-                vctot            = vctot + vcoul;  
-               vgbtot           = vgbtot + vgb;
-                fscal            = -(fijC-fscal)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 49 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            gpol[ggid]       = gpol[ggid] + vgbtot;
-            dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 12 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-       
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel400nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Not calculated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel400nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          isai,isaj,isaprod,gbscale,vgb;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                qq               = isaprod*(-qq);  
-                gbscale          = isaprod*gbtabscale;
-
-                /* Tabulated Generalized-Born interaction */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vgb              = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Inner loop uses 29 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 5 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.h

deleted file mode 100644 (file)

index 301f63e..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel400.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL400_H_
-#define _NBKERNEL400_H_
-
-/*! \file  nb_kernel400.h
- *  \brief Nonbonded kernel 400 (GB Coul)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 400 with forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel400
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 400 without forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> No <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel400nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL400_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.c

deleted file mode 100644 (file)

index 3a4f624..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.c
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-/* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
-
-#include "nb_kernel410.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel410
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel410(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          isai,isaj,isaprod,gbscale,vgb,vgbtot;
-    real          dvdasum,dvdatmp,dvdaj,fgb;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-       gmx_gbdata_t *gbdata;
-       real *        gpol;
-       real          scale_gb;
-       
-       gbdata           = (gmx_gbdata_t *)work;
-       gpol             = gbdata->gpol;
-       
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;     
-       scale_gb         = (1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent);  
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-   
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;  
-            vgbtot           = 0;
-            dvdasum          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                fscal            = vcoul*rinv11;   
-                qq               = isaprod*(-qq)*scale_gb;  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Tabulated Generalized-Born interaction */
-                dvdaj            = dvda[jnr];      
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vgb              = qq*VV;          
-                fijC             = qq*FF*gbscale;  
-                dvdatmp          = -0.5*(vgb+fijC*r);
-                dvdasum          = dvdasum + dvdatmp;
-                dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
-                vctot            = vctot + vcoul;  
-                vgbtot           = vgbtot + vgb;
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-                fscal            = (12.0*Vvdw12-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-                               
-                /* Inner loop uses 62 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            gpol[ggid]       = gpol[ggid] + vgbtot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-            dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 13 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-  
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel410nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Lennard-Jones
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel410nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          isai,isaj,isaprod,gbscale,vgb;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                qq               = isaprod*(-qq);  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Tabulated Generalized-Born interaction */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vgb              = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Lennard-Jones interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                Vvdw12           = c12*rinvsix*rinvsix;
-                Vvdwtot          = Vvdwtot+Vvdw12-Vvdw6;
-
-                /* Inner loop uses 37 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.h

deleted file mode 100644 (file)

index 24e3846..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel410.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL410_H_
-#define _NBKERNEL410_H_
-
-/*! \file  nb_kernel410.h
- *  \brief Nonbonded kernel 410 (GB Coul + LJ)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 410 with forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel410
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 410 without forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> Lennard-Jones <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel410nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL410_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.c

deleted file mode 100644 (file)

index b499eaf..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.c
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-/* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
-
-#include "nb_kernel420.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel420
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel420(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          isai,isaj,isaprod,gbscale,vgb;
-    real          dvdasum,dvdatmp,dvdaj,fgb;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-            dvdasum          = 0;              
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                fscal            = vcoul*rinv11;   
-                qq               = isaprod*(-qq);  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Tabulated Generalized-Born interaction */
-                dvdaj            = dvda[jnr];      
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vgb              = qq*VV;          
-                fijC             = qq*FF*gbscale;  
-                dvdatmp          = -0.5*(vgb+fijC*r);
-                dvdasum          = dvdasum + dvdatmp;
-                dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-                fscal            = (br*Vvdwexp-6.0*Vvdw6)*rinvsq-(fijC-fscal)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 88 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-            dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 13 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel420nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Buckingham
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel420nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          rinvsq;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          rinvsix;
-    real          Vvdw6,Vvdwtot;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          isai,isaj,isaprod,gbscale,vgb;
-    real          Vvdwexp,br;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,cexp1,cexp2;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 3*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                qq               = isaprod*(-qq);  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+3*type[jnr];
-                c6               = vdwparam[tj];   
-                cexp1            = vdwparam[tj+1]; 
-                cexp2            = vdwparam[tj+2]; 
-                rinvsq           = rinv11*rinv11;  
-
-                /* Tabulated Generalized-Born interaction */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vgb              = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Buckingham interaction */
-                rinvsix          = rinvsq*rinvsq*rinvsq;
-                Vvdw6            = c6*rinvsix;     
-                br               = cexp2*rsq11*rinv11;
-                Vvdwexp          = cexp1*exp(-br); 
-                Vvdwtot          = Vvdwtot+Vvdwexp-Vvdw6;
-
-                /* Inner loop uses 64 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.h

deleted file mode 100644 (file)

index b9529bf..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel420.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL420_H_
-#define _NBKERNEL420_H_
-
-/*! \file  nb_kernel420.h
- *  \brief Nonbonded kernel 420 (GB Coul + Bham)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 420 with forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel420
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real *        work);
-
-
-/*! \brief Nonbonded kernel 420 without forces.
- *
- *  \internal  Generated at compile time in either C or Fortran
- *  <b>VdW interaction:</b> Buckingham <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel420nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real *        work);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL420_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.c

deleted file mode 100644 (file)

index 37c113a..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.c
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <math.h>
-
-#include "vec.h"
-#ifdef GMX_THREAD_SHM_FDECOMP
-#include "thread_mpi.h"
-#endif
-
-/* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
-
-#include "nb_kernel430.h"
-
-/*
- * Gromacs nonbonded kernel nb_kernel430
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        yes
- */
-void nb_kernel430(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          fscal,tx,ty,tz;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          FF;
-    real          fijC;
-    real          fijD,fijR;
-    real          isai,isaj,isaprod,gbscale,vgb,vgbtot;
-    real          dvdasum,dvdatmp,dvdaj,fgb;
-    real          ix1,iy1,iz1,fix1,fiy1,fiz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-       gmx_gbdata_t *gbdata;
-       real *        gpol;
-       real          scale_gb;
-       
-       gbdata           = (gmx_gbdata_t *)work;
-       gpol             = gbdata->gpol;
-       
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;   
-    scale_gb         = (1.0/gbdata->epsilon_r) - (1.0/gbdata->gb_epsilon_solvent);
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-            dvdasum          = 0;    
-           vgbtot           = 0;
-
-            /* Clear i atom forces */
-            fix1             = 0;              
-            fiy1             = 0;              
-            fiz1             = 0;              
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                fscal            = vcoul*rinv11;   
-                qq               = isaprod*(-qq)*scale_gb;  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Tabulated Generalized-Born interaction */
-                dvdaj            = dvda[jnr];      
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                vgb              = qq*VV;          
-                fijC             = qq*FF*gbscale;  
-                dvdatmp          = -0.5*(vgb+fijC*r);
-                dvdasum          = dvdasum + dvdatmp;
-                dvda[jnr]        = dvdaj+dvdatmp*isaj*isaj;
-                vctot            = vctot + vcoul;  
-               vgbtot           = vgbtot + vgb;
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw6            = c6*VV;          
-                fijD             = c6*FF;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                FF               = Fp+Geps+2.0*Heps2;
-                Vvdw12           = c12*VV;         
-                fijR             = c12*FF;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-                fscal            = -((fijD+fijR)*tabscale+fijC-fscal)*rinv11;
-
-                /* Calculate temporary vectorial force */
-                tx               = fscal*dx11;     
-                ty               = fscal*dy11;     
-                tz               = fscal*dz11;     
-
-                /* Increment i atom force */
-                fix1             = fix1 + tx;      
-                fiy1             = fiy1 + ty;      
-                fiz1             = fiz1 + tz;      
-
-                /* Decrement j atom force */
-                faction[j3+0]    = faction[j3+0] - tx;
-                faction[j3+1]    = faction[j3+1] - ty;
-                faction[j3+2]    = faction[j3+2] - tz;
-
-                /* Inner loop uses 80 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-            faction[ii3+0]   = faction[ii3+0] + fix1;
-            faction[ii3+1]   = faction[ii3+1] + fiy1;
-            faction[ii3+2]   = faction[ii3+2] + fiz1;
-            fshift[is3]      = fshift[is3]+fix1;
-            fshift[is3+1]    = fshift[is3+1]+fiy1;
-            fshift[is3+2]    = fshift[is3+2]+fiz1;
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-                       gpol[ggid]       = gpol[ggid] + vgbtot;
-            dvda[ii]         = dvda[ii] + dvdasum*isai*isai;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 13 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
-
-
-
-/*
- * Gromacs nonbonded kernel nb_kernel430nf
- * Coulomb interaction:     Generalized-Born
- * VdW interaction:         Tabulated
- * water optimization:      No
- * Calculate forces:        no
- */
-void nb_kernel430nf(
-                    int *           p_nri,
-                    int *           iinr,
-                    int *           jindex,
-                    int *           jjnr,
-                    int *           shift,
-                    real *          shiftvec,
-                    real *          fshift,
-                    int *           gid,
-                    real *          pos,
-                    real *          faction,
-                    real *          charge,
-                    real *          p_facel,
-                    real *          p_krf,
-                    real *          p_crf,
-                    real *          Vc,
-                    int *           type,
-                    int *           p_ntype,
-                    real *          vdwparam,
-                    real *          Vvdw,
-                    real *          p_tabscale,
-                    real *          VFtab,
-                    real *          invsqrta,
-                    real *          dvda,
-                    real *          p_gbtabscale,
-                    real *          GBtab,
-                    int *           p_nthreads,
-                    int *           count,
-                    void *          mtx,
-                    int *           outeriter,
-                    int *           inneriter,
-                    real *          work)
-{
-    int           nri,ntype,nthreads;
-    real          facel,krf,crf,tabscale,gbtabscale;
-    int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
-    int           nn0,nn1,nouter,ninner;
-    real          shX,shY,shZ;
-    real          iq;
-    real          qq,vcoul,vctot;
-    int           nti;
-    int           tj;
-    real          Vvdw6,Vvdwtot;
-    real          Vvdw12;
-    real          r,rt,eps,eps2;
-    int           n0,nnn;
-    real          Y,F,Geps,Heps2,Fp,VV;
-    real          isai,isaj,isaprod,gbscale,vgb;
-    real          ix1,iy1,iz1;
-    real          jx1,jy1,jz1;
-    real          dx11,dy11,dz11,rsq11,rinv11;
-    real          c6,c12;
-
-    nri              = *p_nri;         
-    ntype            = *p_ntype;       
-    nthreads         = *p_nthreads;    
-    facel            = *p_facel;       
-    krf              = *p_krf;         
-    crf              = *p_crf;         
-    tabscale         = *p_tabscale;    
-    gbtabscale       = *p_gbtabscale;  
-
-    /* Reset outer and inner iteration counters */
-    nouter           = 0;              
-    ninner           = 0;              
-
-    /* Loop over thread workunits */
-    
-    do
-    {
-#ifdef GMX_THREAD_SHM_FDECOMP
-        tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
-        nn0              = *count;         
-               
-        /* Take successively smaller chunks (at least 10 lists) */
-        nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
-        *count           = nn1;            
-        tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
-        if(nn1>nri) nn1=nri;
-#else
-           nn0 = 0;
-               nn1 = nri;
-#endif
-        /* Start outer loop over neighborlists */
-        
-        for(n=nn0; (n<nn1); n++)
-        {
-
-            /* Load shift vector for this list */
-            is3              = 3*shift[n];     
-            shX              = shiftvec[is3];  
-            shY              = shiftvec[is3+1];
-            shZ              = shiftvec[is3+2];
-
-            /* Load limits for loop over neighbors */
-            nj0              = jindex[n];      
-            nj1              = jindex[n+1];    
-
-            /* Get outer coordinate index */
-            ii               = iinr[n];        
-            ii3              = 3*ii;           
-
-            /* Load i atom data, add shift vector */
-            ix1              = shX + pos[ii3+0];
-            iy1              = shY + pos[ii3+1];
-            iz1              = shZ + pos[ii3+2];
-
-            /* Load parameters for i atom */
-            iq               = facel*charge[ii];
-            isai             = invsqrta[ii];   
-            nti              = 2*ntype*type[ii];
-
-            /* Zero the potential energy for this list */
-            vctot            = 0;              
-            Vvdwtot          = 0;              
-
-            /* Clear i atom forces */
-            
-            for(k=nj0; (k<nj1); k++)
-            {
-
-                /* Get j neighbor index, and coordinate index */
-                jnr              = jjnr[k];        
-                j3               = 3*jnr;          
-
-                /* load j atom coordinates */
-                jx1              = pos[j3+0];      
-                jy1              = pos[j3+1];      
-                jz1              = pos[j3+2];      
-
-                /* Calculate distance */
-                dx11             = ix1 - jx1;      
-                dy11             = iy1 - jy1;      
-                dz11             = iz1 - jz1;      
-                rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
-
-                /* Calculate 1/r and 1/r2 */
-                rinv11           = gmx_invsqrt(rsq11);
-
-                /* Load parameters for j atom */
-                isaj             = invsqrta[jnr];  
-                isaprod          = isai*isaj;      
-                qq               = iq*charge[jnr]; 
-                vcoul            = qq*rinv11;      
-                qq               = isaprod*(-qq);  
-                gbscale          = isaprod*gbtabscale;
-                tj               = nti+2*type[jnr];
-                c6               = vdwparam[tj];   
-                c12              = vdwparam[tj+1]; 
-
-                /* Tabulated Generalized-Born interaction */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*gbscale;      
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 4*n0;           
-                Y                = GBtab[nnn];     
-                F                = GBtab[nnn+1];   
-                Geps             = eps*GBtab[nnn+2];
-                Heps2            = eps2*GBtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                vgb              = qq*VV;          
-                vctot            = vctot + vcoul;  
-
-                /* Calculate table index */
-                r                = rsq11*rinv11;   
-
-                /* Calculate table index */
-                rt               = r*tabscale;     
-                n0               = rt;             
-                eps              = rt-n0;          
-                eps2             = eps*eps;        
-                nnn              = 8*n0;           
-
-                /* Tabulated VdW interaction - dispersion */
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw6            = c6*VV;          
-
-                /* Tabulated VdW interaction - repulsion */
-                nnn              = nnn+4;          
-                Y                = VFtab[nnn];     
-                F                = VFtab[nnn+1];   
-                Geps             = eps*VFtab[nnn+2];
-                Heps2            = eps2*VFtab[nnn+3];
-                Fp               = F+Geps+Heps2;   
-                VV               = Y+eps*Fp;       
-                Vvdw12           = c12*VV;         
-                Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
-
-                /* Inner loop uses 49 flops/iteration */
-            }
-            
-
-            /* Add i forces to mem and shifted force list */
-
-            /* Add potential energies to the group for this list */
-            ggid             = gid[n];         
-            Vc[ggid]         = Vc[ggid] + vctot;
-            Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
-
-            /* Increment number of inner iterations */
-            ninner           = ninner + nj1 - nj0;
-
-            /* Outer loop uses 6 flops/iteration */
-        }
-        
-
-        /* Increment number of outer iterations */
-        nouter           = nouter + nn1 - nn0;
-    }
-    while (nn1<nri);
-    
-
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = nouter;         
-    *inneriter       = ninner;         
-}
-
-
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.h

deleted file mode 100644 (file)

index fec215b..0000000
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel430.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *                This source code is part of
- *
- *                 G   R   O   M   A   C   S
- *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
- *
- * Gromacs is a library for molecular simulation and trajectory analysis,
- * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
- * a full list of developers and information, check out http://www.gromacs.org
- *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
- * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
- *
- * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL430_H_
-#define _NBKERNEL430_H_
-
-/*! \file  nb_kernel430.h
- *  \brief Nonbonded kernel 430 (GB Coul + Tab VdW)
- *
- *  \internal
- */
-
-#include "types/simple.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-}
-#endif
-
-
-/*! \brief Nonbonded kernel 430 with forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> Yes <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel430
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-/*! \brief Nonbonded kernel 430 without forces.
- *
- *  <b>Coulomb interaction:</b> Generalized Born<br>
- *  <b>VdW interaction:</b> Tabulated  <br>
- *  <b>Water optimization:</b> No <br>
- *  <b>Forces calculated:</b> No <br>
- *
- *  \note All level1 and level2 nonbonded kernels use the same
- *        call sequence. Parameters are documented in nb_kernel.h
- */
-void
-nb_kernel430nf
-                (int *         nri,        int           iinr[],     
-                 int           jindex[],   int           jjnr[],   
-                 int           shift[],    real          shiftvec[],
-                 real          fshift[],   int           gid[], 
-                 real          pos[],      real          faction[],
-                 real          charge[],   real *        facel,
-                 real *        krf,        real *        crf,  
-                 real          Vc[],       int           type[],   
-                 int *         ntype,      real          vdwparam[],
-                 real          Vvdw[],     real *        tabscale,
-                 real          VFtab[],    real          invsqrta[], 
-                 real          dvda[],     real *        gbtabscale,
-                 real          GBtab[],    int *         nthreads, 
-                 int *         count,      void *        mtx,
-                 int *         outeriter,  int *         inneriter,
-                 real          work[]);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _NBKERNEL430_H_ */
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..1a01086
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomP1P1_c.c
@@ -0,0 +1,438 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*81);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 74 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*74);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..19788cc
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3P1_c.c
@@ -0,0 +1,686 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 165 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*165);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 150 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*150);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..3eda3ba
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW3W3_c.c
@@ -0,0 +1,1248 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq01*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq02*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 408 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*408);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 369 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*369);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..3cc3b21
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4P1_c.c
@@ -0,0 +1,772 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq30*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 187 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*187);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 172 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*172);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..2b5f528
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwBham_GeomW4W4_c.c
@@ -0,0 +1,1342 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq13*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq23*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq31*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq32*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq33*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 430 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*430);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 391 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*391);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..cbd63bd
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_c.c
@@ -0,0 +1,462 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 73 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*73);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 61 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*61);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..fc4d13a
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_c.c
@@ -0,0 +1,710 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 157 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*157);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 137 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*137);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..42b8b57
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_c.c
@@ -0,0 +1,1272 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq01*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq02*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 400 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*400);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 356 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*356);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..8a9584e
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_c.c
@@ -0,0 +1,808 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq30*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 181 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*181);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 161 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*161);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..24fcfde
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_c.c
@@ -0,0 +1,1378 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq13*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq23*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq31*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq32*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq33*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 424 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*424);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 4;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 3*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 380 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*380);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..7e32a82
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_c.c
@@ -0,0 +1,434 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 55 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*55);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 46 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*46);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..5aad06d
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_c.c
@@ -0,0 +1,682 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 139 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*139);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 122 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*122);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..29d1d79
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_c.c
@@ -0,0 +1,1244 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq01*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq02*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 382 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*382);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 341 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*341);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..bbaf3be
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_c.c
@@ -0,0 +1,762 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq30*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 158 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*158);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 141 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*141);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..8e939d2
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_c.c
@@ -0,0 +1,1332 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq13*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq23*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq31*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq32*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq33*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 401 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*401);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 360 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*360);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..9366948
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_c.c
@@ -0,0 +1,392 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 42 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*42);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 38 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*38);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..523f481
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_c.c
@@ -0,0 +1,640 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 126 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*126);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 114 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*114);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..90b1757
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_c.c
@@ -0,0 +1,1202 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq01*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq02*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 369 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*369);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq00*FF*vftabscale*rinv00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r01*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq01*FF*vftabscale*rinv01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r02*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq02*FF*vftabscale*rinv02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 333 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*333);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..a661b19
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_c.c
@@ -0,0 +1,640 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq10*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq20*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq30*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 126 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*126);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r10*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq10*FF*vftabscale*rinv10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r20*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq20*FF*vftabscale*rinv20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r30*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq30*FF*vftabscale*rinv30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 114 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*114);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..02fbd64
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_c.c
@@ -0,0 +1,1202 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq11*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq12*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq13*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq21*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq22*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq23*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq31*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq32*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            velec            = qq33*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 369 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*369);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: CubicSplineTable
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r11*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq11*FF*vftabscale*rinv11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r12*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq12*FF*vftabscale*rinv12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r13*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq13*FF*vftabscale*rinv13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r21*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq21*FF*vftabscale*rinv21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r22*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq22*FF*vftabscale*rinv22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r23*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq23*FF*vftabscale*rinv23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r31*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq31*FF*vftabscale*rinv31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r32*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq32*FF*vftabscale*rinv32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r33*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 1*4*vfitab;
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq33*FF*vftabscale*rinv33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 333 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*333);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..f7be552
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomP1P1_c.c
@@ -0,0 +1,402 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 67 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*67);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 63 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*63);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..32bdada
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3P1_c.c
@@ -0,0 +1,598 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 123 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*123);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 117 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*117);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..4fe9979
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW3W3_c.c
@@ -0,0 +1,1004 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 282 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*282);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 270 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*270);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..03af0bb
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4P1_c.c
@@ -0,0 +1,682 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 145 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*145);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 139 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*139);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..8274b81
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwBham_GeomW4W4_c.c
@@ -0,0 +1,1096 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 304 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*304);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 292 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*292);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..f6a33ac
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_c.c
@@ -0,0 +1,454 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 62 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*62);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 53 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*53);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..300dc84
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_c.c
@@ -0,0 +1,650 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 118 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*118);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 107 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*107);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..4023e7a
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_c.c
@@ -0,0 +1,1056 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 277 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*277);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 260 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*260);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..38fe3d3
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_c.c
@@ -0,0 +1,732 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 139 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*139);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 128 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*128);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..c1ddbc0
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_c.c
@@ -0,0 +1,1146 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 298 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*298);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 281 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*281);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..f256bcf
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_c.c
@@ -0,0 +1,394 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 40 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*40);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 34 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*34);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..a04f56e
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_c.c
@@ -0,0 +1,590 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 96 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*96);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 88 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*88);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..fdbe63f
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_c.c
@@ -0,0 +1,996 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 255 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*255);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 241 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*241);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..a51c687
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_c.c
@@ -0,0 +1,672 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 116 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*116);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 108 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*108);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..b78968c
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_c.c
@@ -0,0 +1,1086 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 275 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*275);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 261 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*261);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..36bef38
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomP1P1_c.c
@@ -0,0 +1,356 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 28 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*28);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 27 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*27);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..892270f
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3P1_c.c
@@ -0,0 +1,552 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 84 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*84);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..81b1831
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW3W3_c.c
@@ -0,0 +1,958 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 243 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*243);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq00*rinv00;
+            felec            = velec*rinvsq00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq01*rinv01;
+            felec            = velec*rinvsq01;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq02*rinv02;
+            felec            = velec*rinvsq02;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..ebb7ac0
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4P1_c.c
@@ -0,0 +1,552 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 84 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*84);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq10*rinv10;
+            felec            = velec*rinvsq10;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq20*rinv20;
+            felec            = velec*rinvsq20;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq30*rinv30;
+            felec            = velec*rinvsq30;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..cb255fc
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecCoul_VdwNone_GeomW4W4_c.c
@@ -0,0 +1,958 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 243 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*243);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: Coulomb
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq11*rinv11;
+            felec            = velec*rinvsq11;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq12*rinv12;
+            felec            = velec*rinvsq12;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq13*rinv13;
+            felec            = velec*rinvsq13;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq21*rinv21;
+            felec            = velec*rinvsq21;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq22*rinv22;
+            felec            = velec*rinvsq22;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq23*rinv23;
+            felec            = velec*rinvsq23;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq31*rinv31;
+            felec            = velec*rinvsq31;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq32*rinv32;
+            felec            = velec*rinvsq32;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq33*rinv33;
+            felec            = velec*rinvsq33;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..16f6b23
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_c.c
@@ -0,0 +1,454 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 111 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*111);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 69 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*69);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..956f68d
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_c.c
@@ -0,0 +1,702 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 195 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*195);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 137 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*137);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..249d2b8
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_c.c
@@ -0,0 +1,1264 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*((rinv01-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*((rinv02-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 438 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*438);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 332 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*332);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..c3619b7
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_c.c
@@ -0,0 +1,800 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*((rinv30-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 218 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*218);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 160 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*160);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..bfe65ef
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_c.c
@@ -0,0 +1,1370 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*((rinv13-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*((rinv23-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*((rinv31-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*((rinv32-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*((rinv33-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 461 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*461);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 355 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*355);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..88c102f
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_c.c
@@ -0,0 +1,450 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 59 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*59);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 41 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*41);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..6cd740f
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_c.c
@@ -0,0 +1,698 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 143 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*143);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 109 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*109);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..9cdb942
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_c.c
@@ -0,0 +1,1260 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*((rinv01-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*((rinv02-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 386 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*386);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 304 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*304);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..3e87257
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_c.c
@@ -0,0 +1,790 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*((rinv30-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 163 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*163);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 129 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*129);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..2cb43bd
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_c.c
@@ -0,0 +1,1360 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*((rinv13-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*((rinv23-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*((rinv31-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*((rinv32-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*((rinv33-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 406 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*406);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 324 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*324);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..664442f
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_c.c
@@ -0,0 +1,406 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 42 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*42);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 34 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*34);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..ab8259c
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_c.c
@@ -0,0 +1,654 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 126 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*126);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 102 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*102);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..ff0122b
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_c.c
@@ -0,0 +1,1216 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*((rinv00-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*((rinv01-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*((rinv02-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 369 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*369);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 297 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*297);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..e760e22
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_c.c
@@ -0,0 +1,654 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*((rinv10-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*((rinv20-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*((rinv30-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 126 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*126);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 102 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*102);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..6565b71
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_c.c
@@ -0,0 +1,1216 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*((rinv11-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*((rinv12-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*((rinv13-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*((rinv21-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*((rinv22-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*((rinv23-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*((rinv31-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*((rinv32-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*((rinv33-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 369 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*369);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 297 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*297);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..fe8ab28
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_c.c
@@ -0,0 +1,497 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 101 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*101);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 97 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*97);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..0eca1c6
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_c.c
@@ -0,0 +1,791 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 219 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*219);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 211 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*211);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..e0002e0
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_c.c
@@ -0,0 +1,1491 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 564 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*564);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 544 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*544);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..76c59f2
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_c.c
@@ -0,0 +1,907 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 256 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*256);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 248 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*248);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..743fb40
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_c.c
@@ -0,0 +1,1615 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 601 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*601);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 581 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*581);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..07159bf
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_c.c
@@ -0,0 +1,495 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 75 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*75);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 71 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*71);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..b49875a
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_c.c
@@ -0,0 +1,789 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 193 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*193);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 185 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*185);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..3634b5d
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_c.c
@@ -0,0 +1,1489 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            vvdw            *= sw;
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 538 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*538);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 518 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*518);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..333cf6c
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_c.c
@@ -0,0 +1,905 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 230 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*230);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 222 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*222);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..0872f4d
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_c.c
@@ -0,0 +1,1613 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 575 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*575);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 555 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*555);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..f685701
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_c.c
@@ -0,0 +1,451 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 59 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*59);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 57 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*57);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..7f6e7b5
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_c.c
@@ -0,0 +1,745 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 177 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*177);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 171 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*171);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..f6f0179
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_c.c
@@ -0,0 +1,1445 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 522 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*522);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            d                = r01-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            d                = r02-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 504 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*504);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..48f7d36
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_c.c
@@ -0,0 +1,745 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 177 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*177);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            d                = r10-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            d                = r20-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            d                = r30-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 171 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*171);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..6039b2e
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_c.c
@@ -0,0 +1,1445 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            velec           *= sw;
+            felec            = felec*sw + velec*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 522 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*522);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rcoulomb_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            d                = r11-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            d                = r12-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            d                = r13-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            d                = r21-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            d                = r22-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            d                = r23-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            d                = r31-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            d                = r32-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            d                = r33-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            felec            = felec*sw + velec*dsw;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 504 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*504);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..a04671a
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomP1P1_c.c
@@ -0,0 +1,430 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 79 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*79);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 69 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*69);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..92aef65
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3P1_c.c
@@ -0,0 +1,658 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 161 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*161);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 137 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*137);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..984892f
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW3W3_c.c
@@ -0,0 +1,1160 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 398 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*398);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 332 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*332);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..ff5f280
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4P1_c.c
@@ -0,0 +1,746 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 184 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*184);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 160 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*160);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..1757d44
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwBham_GeomW4W4_c.c
@@ -0,0 +1,1256 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 421 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*421);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 355 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*355);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..679e890
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_c.c
@@ -0,0 +1,482 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 74 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*74);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 59 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*59);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..fb05c2c
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_c.c
@@ -0,0 +1,710 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 156 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*156);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 127 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*127);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..a71be52
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_c.c
@@ -0,0 +1,1212 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 393 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*393);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 322 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*322);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..f691cfc
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_c.c
@@ -0,0 +1,796 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 178 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*178);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 149 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*149);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..ade4412
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_c.c
@@ -0,0 +1,1306 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 415 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*415);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 344 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*344);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..83beeaf
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomP1P1_c.c
@@ -0,0 +1,426 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 53 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*53);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 41 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*41);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..053a2f3
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3P1_c.c
@@ -0,0 +1,654 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 135 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*135);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 109 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*109);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..0697581
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW3W3_c.c
@@ -0,0 +1,1156 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 372 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*372);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 304 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*304);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..3c912ed
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4P1_c.c
@@ -0,0 +1,736 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 155 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*155);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 129 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*129);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..0079058
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwLJ_GeomW4W4_c.c
@@ -0,0 +1,1246 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 392 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*392);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 324 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*324);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..dcb5653
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomP1P1_c.c
@@ -0,0 +1,388 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 41 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*41);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 34 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*34);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..161a335
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3P1_c.c
@@ -0,0 +1,616 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 123 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*123);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 102 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*102);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..3e9e4ec
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW3W3_c.c
@@ -0,0 +1,1118 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq00*(rinv00-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq01*(rinv01-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq02*(rinv02-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 360 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*360);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r00*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq00*rinv00*(rinvsq00-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r01              = rsq01*rinv01;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r01*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq01*rinv01*(rinvsq01-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r02              = rsq02*rinv02;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r02*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq02*rinv02*(rinvsq02-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 297 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*297);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..c690477
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4P1_c.c
@@ -0,0 +1,616 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq10*(rinv10-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq20*(rinv20-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq30*(rinv30-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 123 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*123);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r10              = rsq10*rinv10;
+
+            qq10             = iq1*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r10*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq10*rinv10*(rinvsq10-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r20              = rsq20*rinv20;
+
+            qq20             = iq2*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r20*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq20*rinv20*(rinvsq20-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r30              = rsq30*rinv30;
+
+            qq30             = iq3*jq0;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r30*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq30*rinv30*(rinvsq30-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 102 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*102);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..035931c
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecEw_VdwNone_GeomW4W4_c.c
@@ -0,0 +1,1118 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq11*(rinv11-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq12*(rinv12-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq13*(rinv13-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq21*(rinv21-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq22*(rinv22-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq23*(rinv23-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq31*(rinv31-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq32*(rinv32-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            velec            = qq33*(rinv33-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 360 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*360);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: Ewald
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    sh_ewald         = fr->ic->sh_ewald;
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r11              = rsq11*rinv11;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r11*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq11*rinv11*(rinvsq11-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r12              = rsq12*rinv12;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r12*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq12*rinv12*(rinvsq12-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r13              = rsq13*rinv13;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r13*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq13*rinv13*(rinvsq13-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r21              = rsq21*rinv21;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r21*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq21*rinv21*(rinvsq21-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r22              = rsq22*rinv22;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r22*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq22*rinv22*(rinvsq22-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r23              = rsq23*rinv23;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r23*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq23*rinv23*(rinvsq23-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r31              = rsq31*rinv31;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r31*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq31*rinv31*(rinvsq31-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r32              = rsq32*rinv32;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r32*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq32*rinv32*(rinvsq32-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r33              = rsq33*rinv33;
+
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r33*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq33*rinv33*(rinvsq33-felec);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 297 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*297);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..92b5b6b
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwBham_GeomP1P1_c.c
@@ -0,0 +1,491 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vgbsum           = 0.0;
+        vvdwsum          = 0.0;
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vgbsum          += vgb;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 97 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_polarization[ggid] += vgbsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 16 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*97);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 92 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*92);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..e436b3e
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_c.c
@@ -0,0 +1,533 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vgbsum           = 0.0;
+        vvdwsum          = 0.0;
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vgbsum          += vgb;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 91 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_polarization[ggid] += vgbsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 16 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*91);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..1d61f4f
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwLJ_GeomP1P1_c.c
@@ -0,0 +1,487 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vgbsum           = 0.0;
+        vvdwsum          = 0.0;
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vgbsum          += vgb;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 71 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_polarization[ggid] += vgbsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 16 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*16 + inneriter*71);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 64 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*64);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..0ef6e30
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecGB_VdwNone_GeomP1P1_c.c
@@ -0,0 +1,445 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vgbsum           = 0.0;
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vgbsum          += vgb;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 58 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_polarization[ggid] += vgbsum;
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*15 + inneriter*58);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: GeneralizedBorn
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        isai0            = invsqrta[inr+0];
+
+        dvdasum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            isaj0           = invsqrta[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai0*isaj0;
+            gbqqfactor       = isaprod*(-qq00)*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+0];
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r00*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r00);
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj0*isaj0;
+            velec            = qq00*rinv00;
+            felec            = (velec*rinv00-fgb)*rinv00;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 56 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        dvda[nri]                   = dvda[nri] + dvdasum*isai0*isai0;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*56);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..97ce578
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_c.c
@@ -0,0 +1,399 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 92 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*92);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 58 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*58);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..d5c66d2
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_c.c
@@ -0,0 +1,437 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 79 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*79);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 77 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*77);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBham_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..434429e
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwBham_GeomP1P1_c.c
@@ -0,0 +1,377 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 61 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*61);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 58 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*58);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..b841a2b
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_c.c
@@ -0,0 +1,425 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 55 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*55);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 47 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*47);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..d5b5b1d
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_c.c
@@ -0,0 +1,387 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 37 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*37);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 27 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*27);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..0fc2732
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_c.c
@@ -0,0 +1,435 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 53 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*53);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    rcutoff          = fr->rvdw;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 51 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*51);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJ_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..2e75940
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecNone_VdwLJ_GeomP1P1_c.c
@@ -0,0 +1,365 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 32 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*13 + inneriter*32);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: None
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinvsq00         = 1.0/rsq00;
+
+            /* Load parameters for j particles */
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 27 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 12 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW_F,outeriter*12 + inneriter*27);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..742e123
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_c.c
@@ -0,0 +1,431 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 102 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*102);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 63 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*63);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..90d7032
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_c.c
@@ -0,0 +1,645 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 166 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*166);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 117 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*117);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..83d23be
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_c.c
@@ -0,0 +1,1105 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 349 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*349);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 270 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*270);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..8235742
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_c.c
@@ -0,0 +1,739 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 188 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*188);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 139 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*139);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..b93632a
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_c.c
@@ -0,0 +1,1207 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = (vvdwexp-cexp1_00*exp(-cexp2_00*rvdw)) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 371 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*371);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 292 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*292);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..d0f7070
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_c.c
@@ -0,0 +1,469 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 89 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*89);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 82 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*82);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..d818006
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_c.c
@@ -0,0 +1,683 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 153 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*153);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 136 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*136);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..891995b
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_c.c
@@ -0,0 +1,1143 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 336 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*336);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 289 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*289);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..bee93e5
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_c.c
@@ -0,0 +1,777 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 175 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*175);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 158 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*158);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..a5d8511
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_c.c
@@ -0,0 +1,1245 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 358 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*358);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 311 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*311);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..8d00b31
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_c.c
@@ -0,0 +1,477 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 66 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*66);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 53 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*53);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..1e55e56
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_c.c
@@ -0,0 +1,691 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 130 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*130);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 107 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*107);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..1a1ef76
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_c.c
@@ -0,0 +1,1151 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 313 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*313);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 260 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*260);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..967a003
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_c.c
@@ -0,0 +1,773 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 151 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*151);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 128 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*128);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..3c6532a
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_c.c
@@ -0,0 +1,1241 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 334 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*334);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 281 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*281);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..396f264
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_c.c
@@ -0,0 +1,423 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 49 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*49);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 34 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*34);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..41e87fd
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_c.c
@@ -0,0 +1,637 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 113 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*113);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 88 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*88);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..c7d255b
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_c.c
@@ -0,0 +1,1097 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 296 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*296);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 241 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*241);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..3a290c4
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_c.c
@@ -0,0 +1,729 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 133 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*133);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 108 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*108);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..7d23b9f
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_c.c
@@ -0,0 +1,1197 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = (vvdw12 - c12_00*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_00*sh_vdw_invrcut6)*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 316 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*316);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 261 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*261);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..fbbf0bf
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_c.c
@@ -0,0 +1,467 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 63 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*63);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 56 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*56);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..653589a
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_c.c
@@ -0,0 +1,681 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 127 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*127);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 110 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*110);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..09f9e6e
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_c.c
@@ -0,0 +1,1141 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 310 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*310);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 263 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*263);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..ad6d1b6
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_c.c
@@ -0,0 +1,775 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 149 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*149);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 132 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*132);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..3262295
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_c.c
@@ -0,0 +1,1243 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            vvdw            *= sw;
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 332 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*332);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    rswitch          = fr->rvdw_switch;
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            r00              = rsq00*rinv00;
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            d                = r00-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+
+            /* Evaluate switch function */
+            fvdw             = fvdw*sw + vvdw*dsw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 285 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*285);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..b200f3a
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_c.c
@@ -0,0 +1,379 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 32 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*32);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 27 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*27);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..c53d964
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_c.c
@@ -0,0 +1,593 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 96 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*96);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..8c8af55
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_c.c
@@ -0,0 +1,1053 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 279 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*279);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq00<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq01<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq02<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..e8a4b2c
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_c.c
@@ -0,0 +1,593 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 96 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*96);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq10<rcutoff2)
+            {
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq20<rcutoff2)
+            {
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq30<rcutoff2)
+            {
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..4f6c8d7
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_c.c
@@ -0,0 +1,1053 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 279 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*279);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    rcutoff2         = rcutoff*rcutoff;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq11<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq12<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq13<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq21<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq22<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq23<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq31<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq32<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            }
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            if (rsq33<rcutoff2)
+            {
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            }
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..3af1e9d
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomP1P1_c.c
@@ -0,0 +1,407 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 71 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*71);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 63 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*63);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..e59218c
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3P1_c.c
@@ -0,0 +1,601 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 135 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*135);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 117 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*117);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..848ef5c
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW3W3_c.c
@@ -0,0 +1,1001 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 318 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*318);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 270 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*270);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..c45c408
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4P1_c.c
@@ -0,0 +1,685 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 157 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*157);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 3*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+            cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 139 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*139);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..4ec58cc
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwBham_GeomW4W4_c.c
@@ -0,0 +1,1093 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 340 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*340);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            Buckingham
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 3*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 3*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    cexp1_00         = vdwparam[vdwioffset0+vdwjidx0+1];
+    cexp2_00         = vdwparam[vdwioffset0+vdwjidx0+2];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            br               = cexp2_00*r00;
+            vvdwexp          = cexp1_00*exp(-br);
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 292 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*292);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..6c071ee
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_c.c
@@ -0,0 +1,459 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 66 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*66);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 53 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*53);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..24a3000
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_c.c
@@ -0,0 +1,653 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 130 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*130);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 107 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*107);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..84a90f5
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_c.c
@@ -0,0 +1,1053 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 313 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*313);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 260 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*260);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..2184f12
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_c.c
@@ -0,0 +1,735 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 151 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*151);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 128 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*128);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..ca01432
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_c.c
@@ -0,0 +1,1143 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_00*VV;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            vvdw             = vvdw12+vvdw6;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 334 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*334);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            CubicSplineTable
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            r00              = rsq00*rinv00;
+
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r00*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = 2*4*vfitab;
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += 0;
+            Y                = vftab[vfitab];
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_00*FF;
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            Y                = vftab[vfitab+4];
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_00*FF;
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 281 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*281);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..ca7e9b8
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomP1P1_c.c
@@ -0,0 +1,399 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 44 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 15 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*15 + inneriter*44);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+        vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 34 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_F,outeriter*13 + inneriter*34);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..90fb09b
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3P1_c.c
@@ -0,0 +1,593 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 108 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*32 + inneriter*108);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 88 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_F,outeriter*30 + inneriter*88);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..0e3f3d1
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW3W3_c.c
@@ -0,0 +1,993 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+            vvdwsum         += vvdw;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 291 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 32 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*291);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    qq00             = iq0*jq0;
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = felec+fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 241 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*241);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..6cd4719
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4P1_c.c
@@ -0,0 +1,675 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 128 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*41 + inneriter*128);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+            vdwjidx0         = 2*vdwtype[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+            c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 108 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_F,outeriter*39 + inneriter*108);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..5c90acb
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwLJ_GeomW4W4_c.c
@@ -0,0 +1,1083 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+        vvdwsum          = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            vvdw6            = c6_00*rinvsix;
+            vvdw12           = c12_00*rinvsix*rinvsix;
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            fvdw             = (vvdw12-vvdw6)*rinvsq00;
+
+            /* Update potential sums from outer loop */
+            vvdwsum         += vvdw;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 311 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 41 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*311);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            LennardJones
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+    vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    vdwjidx0         = 2*vdwtype[inr+0];
+    c6_00            = vdwparam[vdwioffset0+vdwjidx0];
+    c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq00         = 1.0/rsq00;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq00*rinvsq00*rinvsq00;
+            fvdw             = (c12_00*rinvsix-c6_00)*rinvsix*rinvsq00;
+
+            fscal            = fvdw;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 261 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 39 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*261);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomP1P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomP1P1_c.c

new file mode 100644 (file)

index 0000000..2f74aea
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomP1P1_c.c
@@ -0,0 +1,361 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 32 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 14 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*14 + inneriter*32);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Particle-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+
+        /* Load parameters for i particles */
+        iq0              = facel*charge[inr+0];
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+
+            rinv00           = gmx_invsqrt(rsq00);
+
+            rinvsq00         = rinv00*rinv00;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 27 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 13 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_F,outeriter*13 + inneriter*27);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3P1_c.c

new file mode 100644 (file)

index 0000000..ea3ce8b
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3P1_c.c
@@ -0,0 +1,555 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 96 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*31 + inneriter*96);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq00             = iq0*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3W3_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3W3_c.c

new file mode 100644 (file)

index 0000000..d8ae0cc
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW3W3_c.c
@@ -0,0 +1,955 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq00*(rinv00+krf*rsq00-crf);
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq01*(rinv01+krf*rsq01-crf);
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq02*(rinv02+krf*rsq02-crf);
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 279 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*31 + inneriter*279);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water3-Water3
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset0;
+    real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
+    real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
+    real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq0              = facel*charge[inr+0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+
+    jq0              = charge[inr+0];
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    qq00             = iq0*jq0;
+    qq01             = iq0*jq1;
+    qq02             = iq0*jq2;
+    qq10             = iq1*jq0;
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq20             = iq2*jq0;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix0              = shX + x[i_coord_offset+DIM*0+XX];
+        iy0              = shY + x[i_coord_offset+DIM*0+YY];
+        iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+
+        fix0             = 0.0;
+        fiy0             = 0.0;
+        fiz0             = 0.0;
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+
+            /* Calculate displacement vector */
+            dx00             = ix0 - jx0;
+            dy00             = iy0 - jy0;
+            dz00             = iz0 - jz0;
+            dx01             = ix0 - jx1;
+            dy01             = iy0 - jy1;
+            dz01             = iz0 - jz1;
+            dx02             = ix0 - jx2;
+            dy02             = iy0 - jy2;
+            dz02             = iz0 - jz2;
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+
+            /* Calculate squared distance and things based on it */
+            rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
+            rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
+            rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+
+            rinv00           = gmx_invsqrt(rsq00);
+            rinv01           = gmx_invsqrt(rsq01);
+            rinv02           = gmx_invsqrt(rsq02);
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+
+            rinvsq00         = rinv00*rinv00;
+            rinvsq01         = rinv01*rinv01;
+            rinvsq02         = rinv02*rinv02;
+            rinvsq10         = rinv10*rinv10;
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq00*(rinv00*rinvsq00-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx00;
+            ty               = fscal*dy00;
+            tz               = fscal*dz00;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq01*(rinv01*rinvsq01-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx01;
+            ty               = fscal*dy01;
+            tz               = fscal*dz01;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq02*(rinv02*rinvsq02-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx02;
+            ty               = fscal*dy02;
+            tz               = fscal*dz02;
+
+            /* Update vectorial force */
+            fix0            += tx;
+            fiy0            += ty;
+            fiz0            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*0+XX] += fix0;
+        f[i_coord_offset+DIM*0+YY] += fiy0;
+        f[i_coord_offset+DIM*0+ZZ] += fiz0;
+        tx                         += fix0;
+        ty                         += fiy0;
+        tz                         += fiz0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4P1_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4P1_c.c

new file mode 100644 (file)

index 0000000..9208f9f
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4P1_c.c
@@ -0,0 +1,555 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq10*(rinv10+krf*rsq10-crf);
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq20*(rinv20+krf*rsq20-crf);
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq30*(rinv30+krf*rsq30-crf);
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 96 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*31 + inneriter*96);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Particle
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx0;
+    real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
+    real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
+    real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
+    real             dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30,cexp1_30,cexp2_30;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx0              = x[j_coord_offset+DIM*0+XX];
+            jy0              = x[j_coord_offset+DIM*0+YY];
+            jz0              = x[j_coord_offset+DIM*0+ZZ];
+
+            /* Calculate displacement vector */
+            dx10             = ix1 - jx0;
+            dy10             = iy1 - jy0;
+            dz10             = iz1 - jz0;
+            dx20             = ix2 - jx0;
+            dy20             = iy2 - jy0;
+            dz20             = iz2 - jz0;
+            dx30             = ix3 - jx0;
+            dy30             = iy3 - jy0;
+            dz30             = iz3 - jz0;
+
+            /* Calculate squared distance and things based on it */
+            rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
+            rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
+            rsq30            = dx30*dx30+dy30*dy30+dz30*dz30;
+
+            rinv10           = gmx_invsqrt(rsq10);
+            rinv20           = gmx_invsqrt(rsq20);
+            rinv30           = gmx_invsqrt(rsq30);
+
+            rinvsq10         = rinv10*rinv10;
+            rinvsq20         = rinv20*rinv20;
+            rinvsq30         = rinv30*rinv30;
+
+            /* Load parameters for j particles */
+            jq0              = charge[jnr+0];
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq10             = iq1*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq10*(rinv10*rinvsq10-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx10;
+            ty               = fscal*dy10;
+            tz               = fscal*dz10;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq20             = iq2*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq20*(rinv20*rinvsq20-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx20;
+            ty               = fscal*dy20;
+            tz               = fscal*dz20;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            qq30             = iq3*jq0;
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq30*(rinv30*rinvsq30-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx30;
+            ty               = fscal*dy30;
+            tz               = fscal*dz30;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*0+XX] -= tx;
+            f[j_coord_offset+DIM*0+YY] -= ty;
+            f[j_coord_offset+DIM*0+ZZ] -= tz;
+
+            /* Inner loop uses 81 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_F,outeriter*30 + inneriter*81);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4W4_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4W4_c.c

new file mode 100644 (file)

index 0000000..0a198f8
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_ElecRF_VdwNone_GeomW4W4_c.c
@@ -0,0 +1,955 @@
+/*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
+ *                This source code is part of
+ *
+ *                 G   R   O   M   A   C   S
+ *
+ * Copyright (c) 2001-2012, The GROMACS Development Team
+ *
+ * Gromacs is a library for molecular simulation and trajectory analysis,
+ * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
+ * a full list of developers and information, check out http://www.gromacs.org
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
+ * later version.
+ *
+ * To help fund GROMACS development, we humbly ask that you cite
+ * the papers people have written on it - you can find them on the website.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        PotentialAndForce
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Reset potential sums */
+        velecsum         = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq11*(rinv11+krf*rsq11-crf);
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq12*(rinv12+krf*rsq12-crf);
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq13*(rinv13+krf*rsq13-crf);
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq21*(rinv21+krf*rsq21-crf);
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq22*(rinv22+krf*rsq22-crf);
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq23*(rinv23+krf*rsq23-crf);
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq31*(rinv31+krf*rsq31-crf);
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq32*(rinv32+krf*rsq32-crf);
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            velec            = qq33*(rinv33+krf*rsq33-crf);
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            /* Update potential sums from outer loop */
+            velecsum        += velec;
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 279 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 31 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*31 + inneriter*279);
+}
+/*
+ * Gromacs nonbonded kernel:   nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c
+ * Electrostatics interaction: ReactionField
+ * VdW interaction:            None
+ * Geometry:                   Water4-Water4
+ * Calculate force/pot:        Force
+ */
+void
+nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    int              vdwioffset1;
+    real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
+    int              vdwioffset2;
+    real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
+    int              vdwioffset3;
+    real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
+    int              vdwjidx1;
+    real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
+    int              vdwjidx2;
+    real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
+    int              vdwjidx3;
+    real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
+    real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
+    real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
+    real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
+    real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
+    real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
+    real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
+    real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
+    real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
+    real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    iq1              = facel*charge[inr+1];
+    iq2              = facel*charge[inr+2];
+    iq3              = facel*charge[inr+3];
+
+    jq1              = charge[inr+1];
+    jq2              = charge[inr+2];
+    jq3              = charge[inr+3];
+    qq11             = iq1*jq1;
+    qq12             = iq1*jq2;
+    qq13             = iq1*jq3;
+    qq21             = iq2*jq1;
+    qq22             = iq2*jq2;
+    qq23             = iq2*jq3;
+    qq31             = iq3*jq1;
+    qq32             = iq3*jq2;
+    qq33             = iq3*jq3;
+
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        ix1              = shX + x[i_coord_offset+DIM*1+XX];
+        iy1              = shY + x[i_coord_offset+DIM*1+YY];
+        iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
+        ix2              = shX + x[i_coord_offset+DIM*2+XX];
+        iy2              = shY + x[i_coord_offset+DIM*2+YY];
+        iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
+        ix3              = shX + x[i_coord_offset+DIM*3+XX];
+        iy3              = shY + x[i_coord_offset+DIM*3+YY];
+        iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
+
+        fix1             = 0.0;
+        fiy1             = 0.0;
+        fiz1             = 0.0;
+        fix2             = 0.0;
+        fiy2             = 0.0;
+        fiz2             = 0.0;
+        fix3             = 0.0;
+        fiy3             = 0.0;
+        fiz3             = 0.0;
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            jx1              = x[j_coord_offset+DIM*1+XX];
+            jy1              = x[j_coord_offset+DIM*1+YY];
+            jz1              = x[j_coord_offset+DIM*1+ZZ];
+            jx2              = x[j_coord_offset+DIM*2+XX];
+            jy2              = x[j_coord_offset+DIM*2+YY];
+            jz2              = x[j_coord_offset+DIM*2+ZZ];
+            jx3              = x[j_coord_offset+DIM*3+XX];
+            jy3              = x[j_coord_offset+DIM*3+YY];
+            jz3              = x[j_coord_offset+DIM*3+ZZ];
+
+            /* Calculate displacement vector */
+            dx11             = ix1 - jx1;
+            dy11             = iy1 - jy1;
+            dz11             = iz1 - jz1;
+            dx12             = ix1 - jx2;
+            dy12             = iy1 - jy2;
+            dz12             = iz1 - jz2;
+            dx13             = ix1 - jx3;
+            dy13             = iy1 - jy3;
+            dz13             = iz1 - jz3;
+            dx21             = ix2 - jx1;
+            dy21             = iy2 - jy1;
+            dz21             = iz2 - jz1;
+            dx22             = ix2 - jx2;
+            dy22             = iy2 - jy2;
+            dz22             = iz2 - jz2;
+            dx23             = ix2 - jx3;
+            dy23             = iy2 - jy3;
+            dz23             = iz2 - jz3;
+            dx31             = ix3 - jx1;
+            dy31             = iy3 - jy1;
+            dz31             = iz3 - jz1;
+            dx32             = ix3 - jx2;
+            dy32             = iy3 - jy2;
+            dz32             = iz3 - jz2;
+            dx33             = ix3 - jx3;
+            dy33             = iy3 - jy3;
+            dz33             = iz3 - jz3;
+
+            /* Calculate squared distance and things based on it */
+            rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
+            rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
+            rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
+            rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
+            rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
+            rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
+            rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
+            rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
+            rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
+
+            rinv11           = gmx_invsqrt(rsq11);
+            rinv12           = gmx_invsqrt(rsq12);
+            rinv13           = gmx_invsqrt(rsq13);
+            rinv21           = gmx_invsqrt(rsq21);
+            rinv22           = gmx_invsqrt(rsq22);
+            rinv23           = gmx_invsqrt(rsq23);
+            rinv31           = gmx_invsqrt(rsq31);
+            rinv32           = gmx_invsqrt(rsq32);
+            rinv33           = gmx_invsqrt(rsq33);
+
+            rinvsq11         = rinv11*rinv11;
+            rinvsq12         = rinv12*rinv12;
+            rinvsq13         = rinv13*rinv13;
+            rinvsq21         = rinv21*rinv21;
+            rinvsq22         = rinv22*rinv22;
+            rinvsq23         = rinv23*rinv23;
+            rinvsq31         = rinv31*rinv31;
+            rinvsq32         = rinv32*rinv32;
+            rinvsq33         = rinv33*rinv33;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq11*(rinv11*rinvsq11-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx11;
+            ty               = fscal*dy11;
+            tz               = fscal*dz11;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq12*(rinv12*rinvsq12-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx12;
+            ty               = fscal*dy12;
+            tz               = fscal*dz12;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq13*(rinv13*rinvsq13-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx13;
+            ty               = fscal*dy13;
+            tz               = fscal*dz13;
+
+            /* Update vectorial force */
+            fix1            += tx;
+            fiy1            += ty;
+            fiz1            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq21*(rinv21*rinvsq21-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx21;
+            ty               = fscal*dy21;
+            tz               = fscal*dz21;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq22*(rinv22*rinvsq22-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx22;
+            ty               = fscal*dy22;
+            tz               = fscal*dz22;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq23*(rinv23*rinvsq23-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx23;
+            ty               = fscal*dy23;
+            tz               = fscal*dz23;
+
+            /* Update vectorial force */
+            fix2            += tx;
+            fiy2            += ty;
+            fiz2            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq31*(rinv31*rinvsq31-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx31;
+            ty               = fscal*dy31;
+            tz               = fscal*dz31;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*1+XX] -= tx;
+            f[j_coord_offset+DIM*1+YY] -= ty;
+            f[j_coord_offset+DIM*1+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq32*(rinv32*rinvsq32-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx32;
+            ty               = fscal*dy32;
+            tz               = fscal*dz32;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*2+XX] -= tx;
+            f[j_coord_offset+DIM*2+YY] -= ty;
+            f[j_coord_offset+DIM*2+ZZ] -= tz;
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            felec            = qq33*(rinv33*rinvsq33-krf2);
+
+            fscal            = felec;
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx33;
+            ty               = fscal*dy33;
+            tz               = fscal*dz33;
+
+            /* Update vectorial force */
+            fix3            += tx;
+            fiy3            += ty;
+            fiz3            += tz;
+            f[j_coord_offset+DIM*3+XX] -= tx;
+            f[j_coord_offset+DIM*3+YY] -= ty;
+            f[j_coord_offset+DIM*3+ZZ] -= tz;
+
+            /* Inner loop uses 234 flops */
+        }
+        /* End of innermost loop */
+
+        tx = ty = tz = 0;
+        f[i_coord_offset+DIM*1+XX] += fix1;
+        f[i_coord_offset+DIM*1+YY] += fiy1;
+        f[i_coord_offset+DIM*1+ZZ] += fiz1;
+        tx                         += fix1;
+        ty                         += fiy1;
+        tz                         += fiz1;
+        f[i_coord_offset+DIM*2+XX] += fix2;
+        f[i_coord_offset+DIM*2+YY] += fiy2;
+        f[i_coord_offset+DIM*2+ZZ] += fiz2;
+        tx                         += fix2;
+        ty                         += fiy2;
+        tz                         += fiz2;
+        f[i_coord_offset+DIM*3+XX] += fix3;
+        f[i_coord_offset+DIM*3+YY] += fiy3;
+        f[i_coord_offset+DIM*3+ZZ] += fiz3;
+        tx                         += fix3;
+        ty                         += fiy3;
+        tz                         += fiz3;
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses 30 flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_F,outeriter*30 + inneriter*234);
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.c

index bcb08bfb585a35f86947a656c9b7ed87bc291c96..25dbb9c0753b9d6482d82a5b7b21bbf9f8c0f213 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.c
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.c
@@ -39,6 +39,7 @@
  #include "smalloc.h"
  
  #include "nb_kernel_allvsall.h"
+#include "nrnb.h"
  
  typedef struct 
  {
@@ -231,16 +232,13 @@ setup_aadata(gmx_allvsall_data_t **  p_aadata,
  
  
  void
-nb_kernel_allvsall(t_forcerec *           fr,
-                                  t_mdatoms *            mdatoms,
-                                  t_blocka *             excl,    
-                                  real *                 x,
-                                  real *                 f,
-                                  real *                 Vc,
-                                  real *                 Vvdw,
-                                  int *                  outeriter,
-                                  int *                  inneriter,
-                                  void *                 work)
+nb_kernel_allvsall(t_nblist *                nlist,
+                   rvec *                    xx,
+                   rvec *                    ff,
+                   t_forcerec *              fr,
+                   t_mdatoms *               mdatoms,
+                   nb_kernel_data_t *        kernel_data,
+                   t_nrnb *                  nrnb)
  {
         gmx_allvsall_data_t *aadata;
         int        natoms;
@@ -263,20 +261,30 @@ nb_kernel_allvsall(t_forcerec *           fr,
      real       vcoul,vctot;
      real       c6,c12,Vvdw6,Vvdw12,Vvdwtot;
      real       fscal;
-    
+    t_blocka * excl;
+    real *     f;
+    real *     x;
+    real *     Vvdw;
+    real *     Vc;
+
+    x                   = xx[0];
+    f                   = ff[0];
         charge              = mdatoms->chargeA;
         type                = mdatoms->typeA;
         facel               = fr->epsfac;
      natoms              = mdatoms->nr;
         ni0                 = mdatoms->start;
         ni1                 = mdatoms->start+mdatoms->homenr;
-    
-    aadata = *((gmx_allvsall_data_t **)work);
+    aadata              = fr->AllvsAll_work;
+    excl                = kernel_data->exclusions;
+
+    Vc                  = kernel_data->energygrp_elec;
+    Vvdw                = kernel_data->energygrp_vdw;
  
         if(aadata==NULL)
         {
                 setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp);
-        *((gmx_allvsall_data_t **)work) = aadata;
+        fr->AllvsAll_work  = aadata;
         }
          
         for(i=ni0; i<ni1; i++)
@@ -430,9 +438,10 @@ nb_kernel_allvsall(t_forcerec *           fr,
                 /* Outer loop uses 6 flops/iteration */
         }    
        
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = ni1-ni0;         
-    *inneriter       = (ni1-ni0)*natoms/2;         
+    /* 12 flops per outer iteration
+     * 19 flops per inner iteration
+     */
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,(ni1-ni0)*12 + ((ni1-ni0)*natoms/2)*19);
  }
  
  
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.h

index d8ca9d2a046b27e773adaaf7b8aac51b925cf64e..950a316faf6a2c0abf6a74b605b179ab2b97c969 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.h
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsall.h
@@ -8,17 +8,16 @@
  
  #include "types/simple.h"
  #include "typedefs.h"
+#include "../nb_kernel.h"
  
  void
-nb_kernel_allvsall(t_forcerec *           fr,
-                  t_mdatoms *            mdatoms,
-                  t_blocka *             excl,    
-                  real *                 x,
-                  real *                 f,
-                  real *                 Vc,
-                  real *                 Vvdw,
-                  int *                  outeriter,
-                  int *                  inneriter,
-                  void *                 work);
+nb_kernel_allvsall(t_nblist *                nlist,
+                   rvec *                    x,
+                   rvec *                    f,
+                   t_forcerec *              fr,
+                   t_mdatoms *               mdatoms,
+                   nb_kernel_data_t *        kernel_data,
+                   t_nrnb *                  nrnb);
+
  
  #endif
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.c

index 809fb8ccce7d746e0f831815bad89633edbf7ada..0709b7fd572fd6e8086c8765bf25fd894c8e5d23 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.c
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.c
@@ -39,6 +39,7 @@
  #include "smalloc.h"
  
  #include "nb_kernel_allvsallgb.h"
+#include "nrnb.h"
  
  typedef struct 
  {
@@ -232,17 +233,13 @@ setup_aadata(gmx_allvsall_data_t **  p_aadata,
  
  
  void
-nb_kernel_allvsallgb(t_forcerec *           fr,
-                     t_mdatoms *            mdatoms,
-                     t_blocka *             excl,    
-                     real *                 x,
-                     real *                 f,
-                     real *                 Vc,
-                     real *                 Vvdw,
-                     real *                 vpol,
-                     int *                  outeriter,
-                     int *                  inneriter,
-                     void *                 work)
+nb_kernel_allvsallgb(t_nblist *                nlist,
+                     rvec *                    xx,
+                     rvec *                    ff,
+                     t_forcerec *              fr,
+                     t_mdatoms *               mdatoms,
+                     nb_kernel_data_t *        kernel_data,
+                     t_nrnb *                  nrnb)
  {
         gmx_allvsall_data_t *aadata;
         int        natoms;
@@ -273,26 +270,39 @@ nb_kernel_allvsallgb(t_forcerec *           fr,
      real       fscal,dvdatmp,fijC,vgb;
      real       Y,F,Fp,Geps,Heps2,VV,FF,eps,eps2,r,rt;
      real       dvdaj,gbscale,isaprod,isai,isaj,gbtabscale;
-    
+    real *     f;
+    real *     x;
+    t_blocka * excl;
+    real *     Vvdw;
+    real *     Vc;
+    real *     vpol;
+
+    x                   = xx[0];
+    f                   = ff[0];
         charge              = mdatoms->chargeA;
         type                = mdatoms->typeA;
      gbfactor            = ((1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent));
         facel               = fr->epsfac;
-    GBtab               = fr->gbtab.tab;
+    GBtab               = fr->gbtab.data;
      gbtabscale          = fr->gbtab.scale;
      invsqrta            = fr->invsqrta;
      dvda                = fr->dvda;
-    
+    vpol                = kernel_data->energygrp_polarization;
+
      natoms              = mdatoms->nr;
         ni0                 = mdatoms->start;
         ni1                 = mdatoms->start+mdatoms->homenr;
      
-    aadata = *((gmx_allvsall_data_t **)work);
+    aadata              = fr->AllvsAll_work;
+    excl                = kernel_data->exclusions;
+
+    Vc                  = kernel_data->energygrp_elec;
+    Vvdw                = kernel_data->energygrp_vdw;
  
         if(aadata==NULL)
         {
                 setup_aadata(&aadata,excl,natoms,type,fr->ntype,fr->nbfp);
-        *((gmx_allvsall_data_t **)work) = aadata;
+        fr->AllvsAll_work  = aadata;
         }
  
         for(i=ni0; i<ni1; i++)
@@ -506,9 +516,10 @@ nb_kernel_allvsallgb(t_forcerec *           fr,
                 /* Outer loop uses 6 flops/iteration */
         }    
  
-    /* Write outer/inner iteration count to pointers */
-    *outeriter       = ni1-ni0;         
-    *inneriter       = (ni1-ni0)*natoms/2;         
+    /* 12 flops per outer iteration
+     * 19 flops per inner iteration
+     */
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,(ni1-ni0)*12 + ((ni1-ni0)*natoms/2)*19);
  }
  
  
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h

index 2661ff7f3f97bf66e91a798164cc174efd392a56..2c3ea9194e034eae55ba64bcc533db6264a3a64a 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_allvsallgb.h
@@ -8,18 +8,15 @@
  
  #include "types/simple.h"
  #include "typedefs.h"
+#include "../nb_kernel.h"
  
  void
-nb_kernel_allvsallgb(t_forcerec *           fr,
-                     t_mdatoms *            mdatoms,
-                     t_blocka *             excl,    
-                     real *                 x,
-                     real *                 f,
-                     real *                 Vc,
-                     real *                 Vvdw,
-                     real *                 vpol,
-                     int *                  outeriter,
-                     int *                  inneriter,
-                     void *                 work);
+nb_kernel_allvsallgb(t_nblist *                nlist,
+                     rvec *                    x,
+                     rvec *                    f,
+                     t_forcerec *              fr,
+                     t_mdatoms *               mdatoms,
+                     nb_kernel_data_t *        kernel_data,
+                     t_nrnb *                  nrnb);
  
  #endif
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c

index 500f0f1fe877bc308344fe116a729ceea3b4bce5..c33cadd829e9c5d5cfd2d4a4b0190a48227cdf53 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.c
@@ -1,264 +1,641 @@
  /*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
   *                This source code is part of
   *
   *                 G   R   O   M   A   C   S
   *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
+ * Copyright (c) 2001-2012, The GROMACS Development Team
   *
   * Gromacs is a library for molecular simulation and trajectory analysis,
   * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
   * a full list of developers and information, check out http://www.gromacs.org
   *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
   * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
   *
   * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
+ * the papers people have written on it - you can find them on the website.
   */
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdio.h>
-
-#include "types/nrnb.h"
-#include "nb_kernel_c.h"
-#include "../nb_kerneltype.h"
+#ifndef nb_kernel_c_h
+#define nb_kernel_c_h
  
+#include "../nb_kernel.h"
  
-/* Include standard kernel headers in local directory */
-#include "nb_kernel010.h"
-#include "nb_kernel020.h"
-#include "nb_kernel030.h"
-#include "nb_kernel100.h"
-#include "nb_kernel101.h"
-#include "nb_kernel102.h"
-#include "nb_kernel103.h"
-#include "nb_kernel104.h"
-#include "nb_kernel110.h"
-#include "nb_kernel111.h"
-#include "nb_kernel112.h"
-#include "nb_kernel113.h"
-#include "nb_kernel114.h"
-#include "nb_kernel120.h"
-#include "nb_kernel121.h"
-#include "nb_kernel122.h"
-#include "nb_kernel123.h"
-#include "nb_kernel124.h"
-#include "nb_kernel130.h"
-#include "nb_kernel131.h"
-#include "nb_kernel132.h"
-#include "nb_kernel133.h"
-#include "nb_kernel134.h"
-#include "nb_kernel200.h"
-#include "nb_kernel201.h"
-#include "nb_kernel202.h"
-#include "nb_kernel203.h"
-#include "nb_kernel204.h"
-#include "nb_kernel210.h"
-#include "nb_kernel211.h"
-#include "nb_kernel212.h"
-#include "nb_kernel213.h"
-#include "nb_kernel214.h"
-#include "nb_kernel220.h"
-#include "nb_kernel221.h"
-#include "nb_kernel222.h"
-#include "nb_kernel223.h"
-#include "nb_kernel224.h"
-#include "nb_kernel230.h"
-#include "nb_kernel231.h"
-#include "nb_kernel232.h"
-#include "nb_kernel233.h"
-#include "nb_kernel234.h"
-#include "nb_kernel300.h"
-#include "nb_kernel301.h"
-#include "nb_kernel302.h"
-#include "nb_kernel303.h"
-#include "nb_kernel304.h"
-#include "nb_kernel310.h"
-#include "nb_kernel311.h"
-#include "nb_kernel312.h"
-#include "nb_kernel313.h"
-#include "nb_kernel314.h"
-#include "nb_kernel320.h"
-#include "nb_kernel321.h"
-#include "nb_kernel322.h"
-#include "nb_kernel323.h"
-#include "nb_kernel324.h"
-#include "nb_kernel330.h"
-#include "nb_kernel331.h"
-#include "nb_kernel332.h"
-#include "nb_kernel333.h"
-#include "nb_kernel334.h"
-#include "nb_kernel400.h"
-#include "nb_kernel410.h"
-#include "nb_kernel420.h"
-#include "nb_kernel430.h"
+nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c;
+nb_kernel_t nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c;
  
  
-static nb_kernel_t *
-kernellist[eNR_NBKERNEL_NR] = 
+nb_kernel_info_t
+kernellist_c[] =
  {
-    nb_kernel010,
-    nb_kernel020,
-    nb_kernel030,
-    nb_kernel100,
-    nb_kernel101,
-    nb_kernel102,
-    nb_kernel103,
-    nb_kernel104,
-    nb_kernel110,
-    nb_kernel111,
-    nb_kernel112,
-    nb_kernel113,
-    nb_kernel114,
-    nb_kernel120,
-    nb_kernel121,
-    nb_kernel122,
-    nb_kernel123,
-    nb_kernel124,
-    nb_kernel130,
-    nb_kernel131,
-    nb_kernel132,
-    nb_kernel133,
-    nb_kernel134,
-    nb_kernel200,
-    nb_kernel201,
-    nb_kernel202,
-    nb_kernel203,
-    nb_kernel204,
-    nb_kernel210,
-    nb_kernel211,
-    nb_kernel212,
-    nb_kernel213,
-    nb_kernel214,
-    nb_kernel220,
-    nb_kernel221,
-    nb_kernel222,
-    nb_kernel223,
-    nb_kernel224,
-    nb_kernel230,
-    nb_kernel231,
-    nb_kernel232,
-    nb_kernel233,
-    nb_kernel234,
-    nb_kernel300,
-    nb_kernel301,
-    nb_kernel302,
-    nb_kernel303,
-    nb_kernel304,
-    nb_kernel310,
-    nb_kernel311,
-    nb_kernel312,
-    nb_kernel313,
-    nb_kernel314,
-    nb_kernel320,
-    nb_kernel321,
-    nb_kernel322,
-    nb_kernel323,
-    nb_kernel324,
-    nb_kernel330,
-    nb_kernel331,
-    nb_kernel332,
-    nb_kernel333,
-    nb_kernel334,
-    nb_kernel400,
-    nb_kernel410,
-    nb_kernel430,
-       nb_kernel010nf,
-    nb_kernel020nf,
-    nb_kernel030nf,
-    nb_kernel100nf,
-    nb_kernel101nf,
-    nb_kernel102nf,
-    nb_kernel103nf,
-    nb_kernel104nf,
-    nb_kernel110nf,
-    nb_kernel111nf,
-    nb_kernel112nf,
-    nb_kernel113nf,
-    nb_kernel114nf,
-    nb_kernel120nf,
-    nb_kernel121nf,
-    nb_kernel122nf,
-    nb_kernel123nf,
-    nb_kernel124nf,
-    nb_kernel130nf,
-    nb_kernel131nf,
-    nb_kernel132nf,
-    nb_kernel133nf,
-    nb_kernel134nf,
-    nb_kernel200nf,
-    nb_kernel201nf,
-    nb_kernel202nf,
-    nb_kernel203nf,
-    nb_kernel204nf,
-    nb_kernel210nf,
-    nb_kernel211nf,
-    nb_kernel212nf,
-    nb_kernel213nf,
-    nb_kernel214nf,
-    nb_kernel220nf,
-    nb_kernel221nf,
-    nb_kernel222nf,
-    nb_kernel223nf,
-    nb_kernel224nf,
-    nb_kernel230nf,
-    nb_kernel231nf,
-    nb_kernel232nf,
-    nb_kernel233nf,
-    nb_kernel234nf,
-    nb_kernel300nf,
-    nb_kernel301nf,
-    nb_kernel302nf,
-    nb_kernel303nf,
-    nb_kernel304nf,
-    nb_kernel310nf,
-    nb_kernel311nf,
-    nb_kernel312nf,
-    nb_kernel313nf,
-    nb_kernel314nf,
-    nb_kernel320nf,
-    nb_kernel321nf,
-    nb_kernel322nf,
-    nb_kernel323nf,
-    nb_kernel324nf,
-    nb_kernel330nf,
-    nb_kernel331nf,
-    nb_kernel332nf,
-    nb_kernel333nf,
-    nb_kernel334nf,
-    nb_kernel400nf,
-    nb_kernel410nf,
-    nb_kernel430nf,
+    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_c", "c", "None", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_c", "c", "None", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_c", "c", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_c", "c", "None", "None", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_c", "c", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_c", "c", "None", "None", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_c", "c", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_c", "c", "None", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBham_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBham_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBhamSh_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_VF_c", "c", "None", "None", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecNone_VdwBhamSw_GeomP1P1_F_c", "c", "None", "None", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_c", "c", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_c", "c", "Ewald", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_c", "c", "Ewald", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_c", "c", "Ewald", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwNone_GeomP1P1_F_c", "c", "Ewald", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_c", "c", "Ewald", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwNone_GeomW3P1_F_c", "c", "Ewald", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_c", "c", "Ewald", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwNone_GeomW3W3_F_c", "c", "Ewald", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_c", "c", "Ewald", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwNone_GeomW4P1_F_c", "c", "Ewald", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_c", "c", "Ewald", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwNone_GeomW4W4_F_c", "c", "Ewald", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_c", "c", "Ewald", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecEw_VdwBham_GeomP1P1_VF_c", "c", "Ewald", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecEw_VdwBham_GeomP1P1_F_c", "c", "Ewald", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW3P1_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecEw_VdwBham_GeomW3P1_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW3W3_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecEw_VdwBham_GeomW3W3_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW4P1_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecEw_VdwBham_GeomW4P1_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecEw_VdwBham_GeomW4W4_VF_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecEw_VdwBham_GeomW4W4_F_c", "c", "Ewald", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c, "nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_c", "c", "Ewald", "PotentialShift", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_c", "c", "Ewald", "PotentialShift", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_c", "c", "Ewald", "PotentialShift", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomP1P1_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3P1_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW3W3_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4P1_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_VF_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c, "nb_kernel_ElecEwSh_VdwBhamSh_GeomW4W4_F_c", "c", "Ewald", "PotentialShift", "Buckingham", "PotentialShift", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomP1P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW3W3_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4P1_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_VF_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c, "nb_kernel_ElecEwSw_VdwBhamSw_GeomW4W4_F_c", "c", "Ewald", "PotentialSwitch", "Buckingham", "PotentialSwitch", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_c", "c", "Coulomb", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_c", "c", "Coulomb", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_c", "c", "Coulomb", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_c", "c", "Coulomb", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_c", "c", "Coulomb", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_c", "c", "Coulomb", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c", "c", "Coulomb", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomP1P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW3W3_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4P1_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_VF_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCoul_VdwBham_GeomW4W4_F_c", "c", "Coulomb", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomP1P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW3W3_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4P1_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_VF_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecCSTab_VdwBham_GeomW4W4_F_c", "c", "CubicSplineTable", "None", "Buckingham", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwLJ_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwNone_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwNone_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwCSTab_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecGB_VdwBham_GeomP1P1_VF_c", "c", "GeneralizedBorn", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecGB_VdwBham_GeomP1P1_F_c", "c", "GeneralizedBorn", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialShift", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "LennardJones", "PotentialSwitch", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwBhamSh_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialShift", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomP1P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW3W3_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4P1_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_VF_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c, "nb_kernel_ElecRFCut_VdwBhamSw_GeomW4W4_F_c", "c", "ReactionField", "ExactCutoff", "Buckingham", "PotentialSwitch", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_c", "c", "ReactionField", "None", "LennardJones", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_c", "c", "ReactionField", "None", "LennardJones", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_c", "c", "ReactionField", "None", "None", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwNone_GeomP1P1_F_c", "c", "ReactionField", "None", "None", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_c", "c", "ReactionField", "None", "None", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwNone_GeomW3P1_F_c", "c", "ReactionField", "None", "None", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_c", "c", "ReactionField", "None", "None", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwNone_GeomW3W3_F_c", "c", "ReactionField", "None", "None", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_c", "c", "ReactionField", "None", "None", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwNone_GeomW4P1_F_c", "c", "ReactionField", "None", "None", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_c", "c", "ReactionField", "None", "None", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwNone_GeomW4W4_F_c", "c", "ReactionField", "None", "None", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_c", "c", "ReactionField", "None", "CubicSplineTable", "None", "Water4Water4", "", "Force" },
+    { nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomP1P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "ParticleParticle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomP1P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "ParticleParticle", "", "Force" },
+    { nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW3P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomW3P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW3W3_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Water3", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c, "nb_kernel_ElecRF_VdwBham_GeomW3W3_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water3Water3", "", "Force" },
+    { nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW4P1_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Particle", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4P1_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Particle", "", "Force" },
+    { nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_VF_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "PotentialAndForce" },
+    { nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c, "nb_kernel_ElecRF_VdwBham_GeomW4W4_F_c", "c", "ReactionField", "None", "Buckingham", "None", "Water4Water4", "", "Force" }
  };
  
+int
+kernellist_c_size = sizeof(kernellist_c)/sizeof(kernellist_c[0]);
  
-void
-nb_kernel_setup(FILE *log,nb_kernel_t **list)
-{
-  int i;
-  nb_kernel_t *p;
-
-  if(NULL != log)
-    fprintf(log,"Configuring standard C nonbonded kernels...\n");
-
-  for(i=0;i<eNR_NBKERNEL_NR;i++)
-  {
-    p = kernellist[i];
-    if(p!=NULL)
-      list[i] = p; 
-  }
-}    
+#endif
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.h b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.h

index c24b95263bb19074540ed5d251b5cce075315c50..8aacf2bb70f9db39f90b34a959d4ed44206bdc54 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.h
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.h
@@ -1,49 +1,36 @@
  /*
+ * Note: this file was generated by the Gromacs c kernel generator.
+ *
   *                This source code is part of
   *
   *                 G   R   O   M   A   C   S
   *
- * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
- * Copyright (c) 2001-2009, The GROMACS Development Team
+ * Copyright (c) 2001-2012, The GROMACS Development Team
   *
   * Gromacs is a library for molecular simulation and trajectory analysis,
   * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
   * a full list of developers and information, check out http://www.gromacs.org
   *
- * This program is free software; you can redistribute it and/or modify it under 
- * the terms of the GNU Lesser General Public License as published by the Free 
- * Software Foundation; either version 2 of the License, or (at your option) any 
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option) any
   * later version.
- * As a special exception, you may use this file as part of a free software
- * library without restriction.  Specifically, if other files instantiate
- * templates or use macros or inline functions from this file, or you compile
- * this file and link it with other files to produce an executable, this
- * file does not by itself cause the resulting executable to be covered by
- * the GNU Lesser General Public License.  
- *
- * In plain-speak: do not worry about classes/macros/templates either - only
- * changes to the library have to be LGPL, not an application linking with it.
   *
   * To help fund GROMACS development, we humbly ask that you cite
- * the papers people have written on it - you can find them on the website!
- */
-#ifndef _NBKERNEL_H_
-#define _NBKERNEL_H_
-
-/** \file
- * \brief The vanilla nonbonded Gromacs kernels (portable, written in C).
- *
- * \internal
+ * the papers people have written on it - you can find them on the website.
   */
+#ifndef nb_kernel_c_h
+#define nb_kernel_c_h
  
-#include <stdio.h>
+#include "../nb_kernel.h"
  
-#include "../nb_kerneltype.h"
-#include "nb_kernel_allvsall.h"
-#include "nb_kernel_allvsallgb.h"
  
-void
-nb_kernel_setup(FILE *fplog,nb_kernel_t **list);
+/* List of kernels for this architecture with metadata about them */
+extern nb_kernel_info_t
+kernellist_c[];
  
-#endif /* _NBKERNEL_H_ */
+/* Length of kernellist_c */
+extern int
+kernellist_c_size;
  
+#endif
diff --git a/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre

new file mode 100644 (file)

index 0000000..275d0e6
--- /dev/null
+++ b/src/gmxlib/nonbonded/nb_kernel_c/nb_kernel_template_c.pre
@@ -0,0 +1,844 @@
+/* #if 0 */
+#error This file must be processed with the Gromacs pre-preprocessor
+/* #endif */
+/* #if INCLUDE_HEADER */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "../nb_kernel.h"
+#include "types/simple.h"
+#include "vec.h"
+#include "nrnb.h"
+/* #endif */
+
+/* ## List of variables set by the generating script:                                    */
+/* ##                                                                                    */
+/* ## Setttings that apply to the entire kernel:                                         */
+/* ## KERNEL_ELEC:           String, choice for electrostatic interactions               */
+/* ## KERNEL_VDW:            String, choice for van der Waals interactions               */
+/* ## KERNEL_NAME:           String, name of this kernel                                 */
+/* ## KERNEL_VF:             String telling if we calculate potential, force, or both    */
+/* ## GEOMETRY_I/GEOMETRY_J: String, name of each geometry, e.g. 'Water3' or '1Particle' */
+/* ##                                                                                    */
+/* ## Setttings that apply to particles in the outer (I) or inner (J) loops:             */
+/* ## PARTICLES_I[]/         Arrays with lists of i/j particles to use in kernel. It is  */
+/* ## PARTICLES_J[]:         just [0] for particle geometry, but can be longer for water */
+/* ## PARTICLES_ELEC_I[]/    Arrays with lists of i/j particle that have electrostatics  */
+/* ## PARTICLES_ELEC_J[]:    interactions that should be calculated in this kernel.      */
+/* ## PARTICLES_VDW_I[]/     Arrays with the list of i/j particle that have VdW          */
+/* ## PARTICLES_VDW_J[]:     interactions that should be calculated in this kernel.      */
+/* ##                                                                                    */
+/* ## Setttings for pairs of interactions (e.g. 2nd i particle against 1st j particle)   */
+/* ## PAIRS_IJ[]:            Array with (i,j) tuples of pairs for which interactions     */
+/* ##                        should be calculated in this kernel. Zero-charge particles  */
+/* ##                        do not have interactions with particles without vdw, and    */
+/* ##                        Vdw-only interactions are not evaluated in a no-vdw-kernel. */
+/* ## INTERACTION_FLAGS[][]: 2D matrix, dimension e.g. 3*3 for water-water interactions. */
+/* ##                        For each i-j pair, the element [I][J] is a list of strings  */
+/* ##                        defining properties/flags of this interaction. Examples     */
+/* ##                        include 'electrostatics'/'vdw' if that type of interaction  */
+/* ##                        should be evaluated, 'rsq'/'rinv'/'rinvsq' if those values  */
+/* ##                        are needed, and 'exactcutoff' or 'shift','switch' to        */
+/* ##                        decide if the force/potential should be modified. This way  */
+/* ##                        we only calculate values absolutely needed for each case.   */
+
+/* ## Calculate the size and offset for (merged/interleaved) table data */
+
+/* #if ('CubicSplineTable' in [KERNEL_ELEC,KERNEL_VDW]) or KERNEL_VF=='PotentialAndForce' */
+/*     #define TABLE_POINT_SIZE 4 */
+/* #else */
+/*     #define TABLE_POINT_SIZE 2 */
+/* #endif */
+
+/* #if 'Table' in KERNEL_ELEC and 'Table' in KERNEL_VDW */
+/*     #define TABLE_NINTERACTIONS 3 */
+/*     #define TABLE_VDW_OFFSET TABLE_POINT_SIZE */
+/* #elif 'Table' in KERNEL_ELEC */
+/*     #define TABLE_NINTERACTIONS 1 */
+/* #elif 'Table' in KERNEL_VDW */
+/*     #define TABLE_NINTERACTIONS 2 */
+/*     #define TABLE_VDW_OFFSET 0 */
+/* #else */
+/*     #define TABLE_NINTERACTIONS 0 */
+/* #endif */
+
+/* #if 'Buckingham' in KERNEL_VDW */
+/*   #define NVDWPARAM 3 */
+/* #else */
+/*   #define NVDWPARAM 2 */
+/* #endif */
+
+/*
+ * Gromacs nonbonded kernel:   {KERNEL_NAME}
+ * Electrostatics interaction: {KERNEL_ELEC}
+ * VdW interaction:            {KERNEL_VDW}
+ * Geometry:                   {GEOMETRY_I}-{GEOMETRY_J}
+ * Calculate force/pot:        {KERNEL_VF}
+ */
+void
+{KERNEL_NAME}
+                    (t_nblist * gmx_restrict                nlist,
+                     rvec * gmx_restrict                    xx,
+                     rvec * gmx_restrict                    ff,
+                     t_forcerec * gmx_restrict              fr,
+                     t_mdatoms * gmx_restrict               mdatoms,
+                     nb_kernel_data_t * gmx_restrict        kernel_data,
+                     t_nrnb * gmx_restrict                  nrnb)
+{
+    /* ## Not all variables are used for all kernels, but any optimizing compiler fixes that, */
+    /* ## so there is no point in going to extremes to exclude variables that are not needed. */
+    int              i_shift_offset,i_coord_offset,j_coord_offset;
+    int              j_index_start,j_index_end;
+    int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
+    real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
+    int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
+    real             *shiftvec,*fshift,*x,*f;
+    /* #for I in PARTICLES_I */
+    int              vdwioffset{I};
+    real             ix{I},iy{I},iz{I},fix{I},fiy{I},fiz{I},iq{I},isai{I};
+    /* #endfor */
+    /* #for J in PARTICLES_J */
+    int              vdwjidx{J};
+    real             jx{J},jy{J},jz{J},fjx{J},fjy{J},fjz{J},jq{J},isaj{J};
+    /* #endfor */
+    /* #for I,J in PAIRS_IJ */
+    real             dx{I}{J},dy{I}{J},dz{I}{J},rsq{I}{J},rinv{I}{J},rinvsq{I}{J},r{I}{J},qq{I}{J},c6_{I}{J},c12_{I}{J},cexp1_{I}{J},cexp2_{I}{J};
+    /* #endfor */
+    /* #if KERNEL_ELEC != 'None' */
+    real             velec,felec,velecsum,facel,crf,krf,krf2;
+    real             *charge;
+    /* #endif */
+    /* #if 'GeneralizedBorn' in KERNEL_ELEC */
+    int              gbitab;
+    real             vgb,fgb,vgbsum,dvdasum,gbscale,gbtabscale,isaprod,gbqqfactor,gbinvepsdiff,dvdaj,gbeps,dvdatmp;
+    real             *invsqrta,*dvda,*gbtab;
+    /* #endif */
+    /* #if KERNEL_VDW != 'None' */
+    int              nvdwtype;
+    real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
+    int              *vdwtype;
+    real             *vdwparam;
+    /* #endif */
+    /* #if 'Table' in KERNEL_ELEC or 'GeneralizedBorn' in KERNEL_ELEC or 'Table' in KERNEL_VDW */
+    int              vfitab;
+    real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
+    real             *vftab;
+    /* #endif */
+    /* #if 'Ewald' in KERNEL_ELEC */
+    int              ewitab;
+    real             ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace;
+    real             *ewtab;
+    /* #endif */
+    /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */
+    real             rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw;
+    /* #endif */
+
+    x                = xx[0];
+    f                = ff[0];
+
+    nri              = nlist->nri;
+    iinr             = nlist->iinr;
+    jindex           = nlist->jindex;
+    jjnr             = nlist->jjnr;
+    shiftidx         = nlist->shift;
+    gid              = nlist->gid;
+    shiftvec         = fr->shift_vec[0];
+    fshift           = fr->fshift[0];
+    /* #if KERNEL_ELEC != 'None' */
+    facel            = fr->epsfac;
+    charge           = mdatoms->chargeA;
+    /*     #if 'ReactionField' in KERNEL_ELEC */
+    krf              = fr->ic->k_rf;
+    krf2             = krf*2.0;
+    crf              = fr->ic->c_rf;
+    /*     #endif */
+    /* #endif */
+    /* #if KERNEL_VDW != 'None' */
+    nvdwtype         = fr->ntype;
+    vdwparam         = fr->nbfp;
+    vdwtype          = mdatoms->typeA;
+    /* #endif */
+
+    /* #if 'Table' in KERNEL_ELEC and 'Table' in KERNEL_VDW */
+    vftab            = kernel_data->table_elec_vdw->data;
+    vftabscale       = kernel_data->table_elec_vdw->scale;
+    /* #elif 'Table' in KERNEL_ELEC */
+    vftab            = kernel_data->table_elec->data;
+    vftabscale       = kernel_data->table_elec->scale;
+    /* #elif 'Table' in KERNEL_VDW */
+    vftab            = kernel_data->table_vdw->data;
+    vftabscale       = kernel_data->table_vdw->scale;
+    /* #endif */
+
+    /* #if 'Ewald' in KERNEL_ELEC */
+    sh_ewald         = fr->ic->sh_ewald;
+    /*     #if KERNEL_VF=='Force'  */
+    ewtab            = fr->ic->tabq_coul_F;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+    /*     #else */
+    ewtab            = fr->ic->tabq_coul_FDV0;
+    ewtabscale       = fr->ic->tabq_scale;
+    ewtabhalfspace   = 0.5/ewtabscale;
+     /*     #endif */
+    /* #endif */
+
+    /* #if KERNEL_ELEC=='GeneralizedBorn' */
+    invsqrta         = fr->invsqrta;
+    dvda             = fr->dvda;
+    gbtabscale       = fr->gbtab.scale;
+    gbtab            = fr->gbtab.data;
+    gbinvepsdiff     = (1.0/fr->epsilon_r) - (1.0/fr->gb_epsilon_solvent);
+    /* #endif */
+
+    /* #if 'Water' in GEOMETRY_I */
+    /* Setup water-specific parameters */
+    inr              = nlist->iinr[0];
+    /*     #for I in PARTICLES_ELEC_I */
+    iq{I}              = facel*charge[inr+{I}];
+    /*     #endfor */
+    /*     #for I in PARTICLES_VDW_I */
+    vdwioffset{I}      = {NVDWPARAM}*nvdwtype*vdwtype[inr+{I}];
+    /*     #endfor */
+    /* #endif */
+
+    /* #if 'Water' in GEOMETRY_J */
+    /*     #for J in PARTICLES_ELEC_J */
+    jq{J}              = charge[inr+{J}];
+    /*     #endfor */
+    /*     #for J in PARTICLES_VDW_J */
+    vdwjidx{J}         = {NVDWPARAM}*vdwtype[inr+{J}];
+    /*     #endfor */
+    /*     #for I,J in PAIRS_IJ */
+    /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
+    qq{I}{J}             = iq{I}*jq{J};
+    /*         #endif */
+    /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
+    /*             #if 'Buckingham' in KERNEL_VDW */
+    c6_{I}{J}            = vdwparam[vdwioffset{I}+vdwjidx{J}];
+    cexp1_{I}{J}         = vdwparam[vdwioffset{I}+vdwjidx{J}+1];
+    cexp2_{I}{J}         = vdwparam[vdwioffset{I}+vdwjidx{J}+2];
+    /*             #else */
+    c6_{I}{J}            = vdwparam[vdwioffset{I}+vdwjidx{J}];
+    c12_{I}{J}           = vdwparam[vdwioffset{I}+vdwjidx{J}+1];
+    /*             #endif */
+    /*         #endif */
+    /*     #endfor */
+    /* #endif */
+
+    /* #if KERNEL_MOD_ELEC!='None' or KERNEL_MOD_VDW!='None' */
+    /*     #if KERNEL_ELEC!='None' */
+    /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */
+    rcutoff          = fr->rcoulomb;
+    /*     #else */
+    rcutoff          = fr->rvdw;
+    /*     #endif */
+    rcutoff2         = rcutoff*rcutoff;
+    /* #endif */
+
+    /* #if KERNEL_MOD_VDW=='PotentialShift' */
+    sh_vdw_invrcut6  = fr->ic->sh_invrc6;
+    rvdw             = fr->rvdw;
+    /* #endif */
+
+    /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */
+    /*     #if KERNEL_MOD_ELEC=='PotentialSwitch'  */
+    rswitch          = fr->rcoulomb_switch;
+    /*     #else */
+    rswitch          = fr->rvdw_switch;
+    /*     #endif */
+    /* Setup switch parameters */
+    d                = rcutoff-rswitch;
+    swV3             = -10.0/(d*d*d);
+    swV4             =  15.0/(d*d*d*d);
+    swV5             =  -6.0/(d*d*d*d*d);
+    /*     #if 'Force' in KERNEL_VF */
+    swF2             = -30.0/(d*d*d);
+    swF3             =  60.0/(d*d*d*d);
+    swF4             = -30.0/(d*d*d*d*d);
+    /*     #endif */
+    /* #endif */
+
+    /* ## Keep track of the floating point operations we issue for reporting! */
+    /* #define OUTERFLOPS 0 */
+    /* #define INNERFLOPS 0 */
+    outeriter        = 0;
+    inneriter        = 0;
+
+    /* Start outer loop over neighborlists */
+    for(iidx=0; iidx<nri; iidx++)
+    {
+        /* Load shift vector for this list */
+        i_shift_offset   = DIM*shiftidx[iidx];
+        shX              = shiftvec[i_shift_offset+XX];
+        shY              = shiftvec[i_shift_offset+YY];
+        shZ              = shiftvec[i_shift_offset+ZZ];
+
+        /* Load limits for loop over neighbors */
+        j_index_start    = jindex[iidx];
+        j_index_end      = jindex[iidx+1];
+
+        /* Get outer coordinate index */
+        inr              = iinr[iidx];
+        i_coord_offset   = DIM*inr;
+
+        /* Load i particle coords and add shift vector */
+        /* ## Loop over i particles, but only include ones that we use - skip e.g. vdw-only sites for elec-only kernel */
+        /*     #for I in PARTICLES_I */
+        ix{I}              = shX + x[i_coord_offset+DIM*{I}+XX];
+        iy{I}              = shY + x[i_coord_offset+DIM*{I}+YY];
+        iz{I}              = shZ + x[i_coord_offset+DIM*{I}+ZZ];
+        /*     #define OUTERFLOPS OUTERFLOPS+3 */
+        /* #endfor */
+
+        /* #if 'Force' in KERNEL_VF */
+        /*     #for I in PARTICLES_I */
+        fix{I}             = 0.0;
+        fiy{I}             = 0.0;
+        fiz{I}             = 0.0;
+        /*     #endfor */
+        /* #endif */
+
+        /* ## For water we already preloaded parameters at the start of the kernel */
+        /* #if not 'Water' in GEOMETRY_I */
+        /* Load parameters for i particles */
+        /*     #for I in PARTICLES_ELEC_I */
+        iq{I}              = facel*charge[inr+{I}];
+        /*         #define OUTERFLOPS OUTERFLOPS+1 */
+        /*         #if KERNEL_ELEC=='GeneralizedBorn' */
+        isai{I}            = invsqrta[inr+{I}];
+        /*         #endif */
+        /*     #endfor */
+        /*     #for I in PARTICLES_VDW_I */
+        vdwioffset{I}      = {NVDWPARAM}*nvdwtype*vdwtype[inr+{I}];
+        /*     #endfor */
+        /* #endif */
+
+        /* #if 'Potential' in KERNEL_VF */
+        /* Reset potential sums */
+        /*     #if KERNEL_ELEC != 'None' */
+        velecsum         = 0.0;
+        /*     #endif */
+        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
+        vgbsum           = 0.0;
+        /*     #endif */
+        /*     #if KERNEL_VDW != 'None' */
+        vvdwsum          = 0.0;
+        /*     #endif */
+        /* #endif */
+        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
+        dvdasum          = 0.0;
+        /*     #endif */
+
+        /* Start inner kernel loop */
+        for(jidx=j_index_start; jidx<j_index_end; jidx++)
+        {
+            /* Get j neighbor index, and coordinate index */
+            jnr              = jjnr[jidx];
+            j_coord_offset   = DIM*jnr;
+
+            /* load j atom coordinates */
+            /* #for J in PARTICLES_J */
+            jx{J}              = x[j_coord_offset+DIM*{J}+XX];
+            jy{J}              = x[j_coord_offset+DIM*{J}+YY];
+            jz{J}              = x[j_coord_offset+DIM*{J}+ZZ];
+            /* #endfor */
+
+            /* Calculate displacement vector */
+            /* #for I,J in PAIRS_IJ */
+            dx{I}{J}             = ix{I} - jx{J};
+            dy{I}{J}             = iy{I} - jy{J};
+            dz{I}{J}             = iz{I} - jz{J};
+            /*     #define INNERFLOPS INNERFLOPS+3 */
+            /* #endfor */
+
+            /* Calculate squared distance and things based on it */
+            /* #for I,J in PAIRS_IJ */
+            rsq{I}{J}            = dx{I}{J}*dx{I}{J}+dy{I}{J}*dy{I}{J}+dz{I}{J}*dz{I}{J};
+            /*     #define INNERFLOPS INNERFLOPS+5 */
+            /* #endfor */
+
+            /* #for I,J in PAIRS_IJ */
+            /*     #if 'rinv' in INTERACTION_FLAGS[I][J] */
+            rinv{I}{J}           = gmx_invsqrt(rsq{I}{J});
+            /*         #define INNERFLOPS INNERFLOPS+5 */
+            /*     #endif */
+            /* #endfor */
+
+            /* #for I,J in PAIRS_IJ */
+            /*     #if 'rinvsq' in INTERACTION_FLAGS[I][J] */
+            /*         # if 'rinv' not in INTERACTION_FLAGS[I][J] */
+            rinvsq{I}{J}         = 1.0/rsq{I}{J};
+            /*             #define INNERFLOPS INNERFLOPS+4 */
+            /*         #else */
+            rinvsq{I}{J}         = rinv{I}{J}*rinv{I}{J};
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*         #endif */
+            /*     #endif */
+            /* #endfor */
+
+            /* #if not 'Water' in GEOMETRY_J */
+            /* Load parameters for j particles */
+            /*     #for J in PARTICLES_ELEC_J */
+            jq{J}              = charge[jnr+{J}];
+            /*         #if KERNEL_ELEC=='GeneralizedBorn' */
+            isaj{J}           = invsqrta[jnr+{J}];
+            /*         #endif */
+            /*     #endfor */
+            /*     #for J in PARTICLES_VDW_J */
+            vdwjidx{J}         = {NVDWPARAM}*vdwtype[jnr+{J}];
+            /*     #endfor */
+            /* #endif */
+
+            /* #for I,J in PAIRS_IJ */
+
+            /**************************
+             * CALCULATE INTERACTIONS *
+             **************************/
+
+            /*     #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
+            /*         ## We always calculate rinv/rinvsq above to enable pipelineing in compilers (performance tested on x86) */
+            if (rsq{I}{J}<rcutoff2)
+            {
+                /*     #if 0    ## this and the next two lines is a hack to maintain auto-indentation in template file */
+            }
+            /*         #endif */
+            /*     #endif */
+
+            /*     #if 'r' in INTERACTION_FLAGS[I][J] */
+            r{I}{J}              = rsq{I}{J}*rinv{I}{J};
+            /*         #define INNERFLOPS INNERFLOPS+1 */
+            /*     #endif */
+
+            /*     ## For water geometries we already loaded parameters at the start of the kernel */
+            /*     #if not 'Water' in GEOMETRY_J */
+            /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
+            qq{I}{J}             = iq{I}*jq{J};
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*         #endif */
+            /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
+            /*             #if KERNEL_VDW=='Buckingham' */
+            c6_{I}{J}            = vdwparam[vdwioffset{I}+vdwjidx{J}];
+            cexp1_{I}{J}         = vdwparam[vdwioffset{I}+vdwjidx{J}+1];
+            cexp2_{I}{J}         = vdwparam[vdwioffset{I}+vdwjidx{J}+2];
+            /*             #else */
+            c6_{I}{J}            = vdwparam[vdwioffset{I}+vdwjidx{J}];
+            c12_{I}{J}           = vdwparam[vdwioffset{I}+vdwjidx{J}+1];
+            /*             #endif */
+            /*         #endif */
+            /*     #endif */
+
+            /*     #if 'table' in INTERACTION_FLAGS[I][J] */
+            /* Calculate table index by multiplying r with table scale and truncate to integer */
+            rt               = r{I}{J}*vftabscale;
+            vfitab           = rt;
+            vfeps            = rt-vfitab;
+            vfitab           = {TABLE_NINTERACTIONS}*{TABLE_POINT_SIZE}*vfitab;
+            /*         #define INNERFLOPS INNERFLOPS+2 */
+            /*     #endif */
+
+            /*     ## ELECTROSTATIC INTERACTIONS */
+            /*     #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
+
+            /*         #if KERNEL_ELEC=='Coulomb' */
+
+            /* COULOMB ELECTROSTATICS */
+            velec            = qq{I}{J}*rinv{I}{J};
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*             #if 'Force' in KERNEL_VF */
+            felec            = velec*rinvsq{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+2 */
+            /*             #endif */
+
+            /*         #elif KERNEL_ELEC=='ReactionField' */
+
+            /* REACTION-FIELD ELECTROSTATICS */
+            /*             #if 'Potential' in KERNEL_VF */
+            velec            = qq{I}{J}*(rinv{I}{J}+krf*rsq{I}{J}-crf);
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            felec            = qq{I}{J}*(rinv{I}{J}*rinvsq{I}{J}-krf2);
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+
+            /*         #elif KERNEL_ELEC=='GeneralizedBorn' */
+
+            /* GENERALIZED BORN AND COULOMB ELECTROSTATICS */
+            isaprod          = isai{I}*isaj{J};
+            gbqqfactor       = isaprod*(-qq{I}{J})*gbinvepsdiff;
+            gbscale          = isaprod*gbtabscale;
+            dvdaj            = dvda[jnr+{J}];
+            /*             #define INNERFLOPS INNERFLOPS+5 */
+
+            /* Calculate generalized born table index - this is a separate table from the normal one,
+             * but we use the same procedure by multiplying r with scale and truncating to integer.
+             */
+            rt               = r{I}{J}*gbscale;
+            gbitab           = rt;
+            gbeps            = rt-gbitab;
+            gbitab           = 4*gbitab;
+
+            Y                = gbtab[gbitab];
+            F                = gbtab[gbitab+1];
+            Geps             = gbeps*gbtab[gbitab+2];
+            Heps2            = gbeps*gbeps*gbtab[gbitab+3];
+            Fp               = F+Geps+Heps2;
+            VV               = Y+gbeps*Fp;
+            vgb              = gbqqfactor*VV;
+            /*             #define INNERFLOPS INNERFLOPS+10 */
+
+            /*             #if 'Force' in KERNEL_VF */
+            FF               = Fp+Geps+2.0*Heps2;
+            fgb              = gbqqfactor*FF*gbscale;
+            dvdatmp          = -0.5*(vgb+fgb*r{I}{J});
+            dvdasum          = dvdasum + dvdatmp;
+            dvda[jnr]        = dvdaj+dvdatmp*isaj{J}*isaj{J};
+            /*                 #define INNERFLOPS INNERFLOPS+13 */
+            /*             #endif */
+            velec            = qq{I}{J}*rinv{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+1 */
+            /*             #if 'Force' in KERNEL_VF */
+            felec            = (velec*rinv{I}{J}-fgb)*rinv{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+
+            /*         #elif KERNEL_ELEC=='Ewald' */
+            /* EWALD ELECTROSTATICS */
+
+            /* Calculate Ewald table index by multiplying r with scale and truncate to integer */
+            ewrt             = r{I}{J}*ewtabscale;
+            ewitab           = ewrt;
+            eweps            = ewrt-ewitab;
+            /*             #define INNERFLOPS INNERFLOPS+2 */
+            /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_ELEC=='PotentialSwitch' */
+            ewitab           = 4*ewitab;
+            felec            = ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*                 #if KERNEL_MOD_ELEC=='PotentialShift' */
+            velec            = qq{I}{J}*((rinv{I}{J}-sh_ewald)-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            /*                     #define INNERFLOPS INNERFLOPS+7 */
+            /*                 #else */
+            velec            = qq{I}{J}*(rinv{I}{J}-(ewtab[ewitab+2]-ewtabhalfspace*eweps*(ewtab[ewitab]+felec)));
+            /*                     #define INNERFLOPS INNERFLOPS+6 */
+            /*                 #endif */
+            /*                 #if 'Force' in KERNEL_VF */
+            felec            = qq{I}{J}*rinv{I}{J}*(rinvsq{I}{J}-felec);
+            /*                      #define INNERFLOPS INNERFLOPS+3 */
+            /*                 #endif */
+            /*             #elif KERNEL_VF=='Force' */
+            felec            = (1.0-eweps)*ewtab[ewitab]+eweps*ewtab[ewitab+1];
+            felec            = qq{I}{J}*rinv{I}{J}*(rinvsq{I}{J}-felec);
+            /*                 #define INNERFLOPS INNERFLOPS+7 */
+            /*             #endif */
+
+            /*         #elif KERNEL_ELEC=='CubicSplineTable' */
+
+            /* CUBIC SPLINE TABLE ELECTROSTATICS */
+            /*             #if 'Force' in KERNEL_VF */
+            Y                = vftab[vfitab];
+            /*             #endif */
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            /*             #define INNERFLOPS INNERFLOPS+5 */
+            /*             #if 'Potential' in KERNEL_VF */
+            VV               = Y+vfeps*Fp;
+            velec            = qq{I}{J}*VV;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            FF               = Fp+Geps+2.0*Heps2;
+            felec            = -qq{I}{J}*FF*vftabscale*rinv{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+7 */
+            /*             #endif */
+            /*         #endif */
+            /*         ## End of check for electrostatics interaction forms */
+            /*     #endif */
+            /*     ## END OF ELECTROSTATIC INTERACTION CHECK FOR PAIR I-J */
+
+            /*     #if 'vdw' in INTERACTION_FLAGS[I][J] */
+
+            /*         #if KERNEL_VDW=='LennardJones' */
+
+            /* LENNARD-JONES DISPERSION/REPULSION */
+
+            rinvsix          = rinvsq{I}{J}*rinvsq{I}{J}*rinvsq{I}{J};
+            /*             #define INNERFLOPS INNERFLOPS+2 */
+            /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch' */
+            vvdw6            = c6_{I}{J}*rinvsix;
+            vvdw12           = c12_{I}{J}*rinvsix*rinvsix;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*                 #if KERNEL_MOD_VDW=='PotentialShift' */
+            vvdw             = (vvdw12 - c12_{I}{J}*sh_vdw_invrcut6*sh_vdw_invrcut6)*(1.0/12.0) - (vvdw6 - c6_{I}{J}*sh_vdw_invrcut6)*(1.0/6.0);
+            /*                     #define INNERFLOPS INNERFLOPS+8 */
+            /*                 #else */
+            vvdw             = vvdw12*(1.0/12.0) - vvdw6*(1.0/6.0);
+            /*                     #define INNERFLOPS INNERFLOPS+3 */
+            /*                 #endif */
+            /*                 ## Check for force inside potential check, i.e. this means we already did the potential part */
+            /*                 #if 'Force' in KERNEL_VF */
+            fvdw             = (vvdw12-vvdw6)*rinvsq{I}{J};
+            /*                     #define INNERFLOPS INNERFLOPS+2 */
+            /*                 #endif */
+            /*             #elif KERNEL_VF=='Force' */
+            /*                 ## Force-only LennardJones makes it possible to save 1 flop (they do add up...) */
+            fvdw             = (c12_{I}{J}*rinvsix-c6_{I}{J})*rinvsix*rinvsq{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*             #endif */
+
+            /*         #elif KERNEL_VDW=='Buckingham' */
+
+            /* BUCKINGHAM DISPERSION/REPULSION */
+            rinvsix          = rinvsq{I}{J}*rinvsq{I}{J}*rinvsq{I}{J};
+            vvdw6            = c6_{I}{J}*rinvsix;
+            br               = cexp2_{I}{J}*r{I}{J};
+            vvdwexp          = cexp1_{I}{J}*exp(-br);
+            /*             ## Estimate exp() to 25 flops */
+            /*             #define INNERFLOPS INNERFLOPS+31 */
+            /*             #if 'Potential' in KERNEL_VF or KERNEL_MOD_VDW=='PotentialSwitch'  */
+            /*                 #if KERNEL_MOD_VDW=='PotentialShift' */
+            vvdw             = (vvdwexp-cexp1_{I}{J}*exp(-cexp2_{I}{J}*rvdw)) - (vvdw6 - c6_{I}{J}*sh_vdw_invrcut6)*(1.0/6.0);
+            /*                     #define INNERFLOPS INNERFLOPS+33 */
+            /*                  #else */
+            vvdw             = vvdwexp - vvdw6*(1.0/6.0);
+            /*                     #define INNERFLOPS INNERFLOPS+2 */
+            /*                 #endif */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            fvdw             = (br*vvdwexp-vvdw6)*rinvsq{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+
+            /*         #elif KERNEL_VDW=='CubicSplineTable' */
+
+            /* CUBIC SPLINE TABLE DISPERSION */
+            vfitab          += {TABLE_VDW_OFFSET};
+            /*             #if 'Force' in KERNEL_VF */
+            Y                = vftab[vfitab];
+            /*             #endif */
+            F                = vftab[vfitab+1];
+            Geps             = vfeps*vftab[vfitab+2];
+            Heps2            = vfeps*vfeps*vftab[vfitab+3];
+            Fp               = F+Geps+Heps2;
+            /*             #define INNERFLOPS INNERFLOPS+5 */
+            /*             #if 'Potential' in KERNEL_VF */
+            VV               = Y+vfeps*Fp;
+            vvdw6            = c6_{I}{J}*VV;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw6            = c6_{I}{J}*FF;
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*             #endif */
+
+            /* CUBIC SPLINE TABLE REPULSION */
+            /*             #if 'Force' in KERNEL_VF */
+            Y                = vftab[vfitab+4];
+            /*             #endif */
+            F                = vftab[vfitab+5];
+            Geps             = vfeps*vftab[vfitab+6];
+            Heps2            = vfeps*vfeps*vftab[vfitab+7];
+            Fp               = F+Geps+Heps2;
+            /*             #define INNERFLOPS INNERFLOPS+5 */
+            /*             #if 'Potential' in KERNEL_VF */
+            VV               = Y+vfeps*Fp;
+            vvdw12           = c12_{I}{J}*VV;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            FF               = Fp+Geps+2.0*Heps2;
+            fvdw12           = c12_{I}{J}*FF;
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*             #endif */
+            /*             #if 'Potential' in KERNEL_VF */
+            vvdw             = vvdw12+vvdw6;
+            /*                 #define INNERFLOPS INNERFLOPS+1 */
+            /*             #endif */
+            /*             #if 'Force' in KERNEL_VF */
+            fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv{I}{J};
+            /*                 #define INNERFLOPS INNERFLOPS+4 */
+            /*             #endif */
+            /*         #endif */
+            /*         ## End of check for vdw interaction forms */
+            /*     #endif */
+            /*     ## END OF VDW INTERACTION CHECK FOR PAIR I-J */
+
+            /*     #if 'switch' in INTERACTION_FLAGS[I][J] */
+            d                = r{I}{J}-rswitch;
+            d                = (d>0.0) ? d : 0.0;
+            d2               = d*d;
+            sw               = 1.0+d2*d*(swV3+d*(swV4+d*swV5));
+            /*         #define INNERFLOPS INNERFLOPS+9 */
+
+            /*         #if 'Force' in KERNEL_VF */
+            dsw              = d2*(swF2+d*(swF3+d*swF4));
+            /*             #define INNERFLOPS INNERFLOPS+5 */
+            /*         #endif */
+
+            /* Evaluate switch function */
+            /*         #if 'Potential' in KERNEL_VF */
+            /*             #if 'electrostatics' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_ELEC=='PotentialSwitch' */
+            velec           *= sw;
+            /*                 #define INNERFLOPS INNERFLOPS+1 */
+            /*             #endif */
+            /*             #if 'vdw' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_VDW=='PotentialSwitch' */
+            vvdw            *= sw;
+            /*                 #define INNERFLOPS INNERFLOPS+1 */
+            /*             #endif */
+            /*         #endif */
+            /*         #if 'Force' in KERNEL_VF */
+            /*             #if 'electrostatics' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_ELEC=='PotentialSwitch' */
+            felec            = felec*sw + velec*dsw;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+            /*             #if 'vdw' in INTERACTION_FLAGS[I][J] and KERNEL_MOD_VDW=='PotentialSwitch' */
+            fvdw             = fvdw*sw + vvdw*dsw;
+            /*                 #define INNERFLOPS INNERFLOPS+3 */
+            /*             #endif */
+            /*         #endif */
+            /*     #endif */
+
+            /*     #if 'Potential' in KERNEL_VF */
+            /* Update potential sums from outer loop */
+            /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] */
+            velecsum        += velec;
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*             #if KERNEL_ELEC=='GeneralizedBorn' */
+            vgbsum          += vgb;
+            /*                 #define INNERFLOPS INNERFLOPS+1 */
+            /*             #endif */
+            /*         #endif */
+            /*         #if 'vdw' in INTERACTION_FLAGS[I][J] */
+            vvdwsum         += vvdw;
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*         #endif */
+            /*     #endif */
+
+            /*     #if 'Force' in KERNEL_VF */
+
+            /*         #if 'electrostatics' in INTERACTION_FLAGS[I][J] and 'vdw' in INTERACTION_FLAGS[I][J] */
+            fscal            = felec+fvdw;
+            /*             #define INNERFLOPS INNERFLOPS+1 */
+            /*         #elif 'electrostatics' in INTERACTION_FLAGS[I][J] */
+            fscal            = felec;
+            /*         #elif 'vdw' in INTERACTION_FLAGS[I][J] */
+            fscal            = fvdw;
+            /*        #endif */
+
+            /* Calculate temporary vectorial force */
+            tx               = fscal*dx{I}{J};
+            ty               = fscal*dy{I}{J};
+            tz               = fscal*dz{I}{J};
+
+            /* Update vectorial force */
+            fix{I}            += tx;
+            fiy{I}            += ty;
+            fiz{I}            += tz;
+            f[j_coord_offset+DIM*{J}+XX] -= tx;
+            f[j_coord_offset+DIM*{J}+YY] -= ty;
+            f[j_coord_offset+DIM*{J}+ZZ] -= tz;
+
+            /*         #define INNERFLOPS INNERFLOPS+9 */
+            /*     #endif */
+
+            /*     #if 'exactcutoff' in INTERACTION_FLAGS[I][J] */
+            /*         #if 0    ## This and next two lines is a hack to maintain indentation in template file */
+            {
+                /*     #endif */
+            }
+            /*     #endif */
+            /*    ## End of check for the interaction being outside the cutoff */
+
+            /* #endfor */
+            /* ## End of loop over i-j interaction pairs */
+
+            /* Inner loop uses {INNERFLOPS} flops */
+        }
+        /* End of innermost loop */
+
+        /* #if 'Force' in KERNEL_VF */
+        tx = ty = tz = 0;
+        /*     #for I in PARTICLES_I */
+        f[i_coord_offset+DIM*{I}+XX] += fix{I};
+        f[i_coord_offset+DIM*{I}+YY] += fiy{I};
+        f[i_coord_offset+DIM*{I}+ZZ] += fiz{I};
+        tx                         += fix{I};
+        ty                         += fiy{I};
+        tz                         += fiz{I};
+        /*         #define OUTERFLOPS OUTERFLOPS+6 */
+        /*     #endfor */
+        fshift[i_shift_offset+XX]  += tx;
+        fshift[i_shift_offset+YY]  += ty;
+        fshift[i_shift_offset+ZZ]  += tz;
+        /*     #define OUTERFLOPS OUTERFLOPS+3 */
+        /* #endif */
+
+        /* #if 'Potential' in KERNEL_VF */
+        ggid                        = gid[iidx];
+        /* Update potential energies */
+        /*     #if KERNEL_ELEC != 'None' */
+        kernel_data->energygrp_elec[ggid] += velecsum;
+        /*         #define OUTERFLOPS OUTERFLOPS+1 */
+        /*     #endif */
+        /*     #if 'GeneralizedBorn' in KERNEL_ELEC */
+        kernel_data->energygrp_polarization[ggid] += vgbsum;
+        /*         #define OUTERFLOPS OUTERFLOPS+1 */
+        /*     #endif */
+        /*     #if KERNEL_VDW != 'None' */
+        kernel_data->energygrp_vdw[ggid] += vvdwsum;
+        /*         #define OUTERFLOPS OUTERFLOPS+1 */
+        /*     #endif */
+        /* #endif */
+        /*     #if 'GeneralizedBorn' in KERNEL_ELEC and 'Force' in KERNEL_VF */
+        dvda[nri]                   = dvda[nri] + dvdasum*isai{I}*isai{I};
+        /*     #endif */
+
+        /* Increment number of inner iterations */
+        inneriter                  += j_index_end - j_index_start;
+
+        /* Outer loop uses {OUTERFLOPS} flops */
+    }
+
+    /* Increment number of outer iterations */
+    outeriter        += nri;
+
+    /* Update outer/inner flops */
+    /* ## NB: This is not important, it just affects the flopcount. However, since our preprocessor is */
+    /* ## primitive and replaces aggressively even in strings inside these directives, we need to      */
+    /* ## assemble the main part of the name (containing KERNEL/ELEC/VDW) directly in the source.      */
+    /* #if GEOMETRY_I == 'Water3'            */
+    /*     #define ISUFFIX '_W3'             */
+    /* #elif GEOMETRY_I == 'Water4'          */
+    /*     #define ISUFFIX '_W4'             */
+    /* #else                                 */
+    /*     #define ISUFFIX ''                */
+    /* #endif                                */
+    /* #if GEOMETRY_J == 'Water3'            */
+    /*     #define JSUFFIX 'W3'              */
+    /* #elif GEOMETRY_J == 'Water4'          */
+    /*     #define JSUFFIX 'W4'              */
+    /* #else                                 */
+    /*     #define JSUFFIX ''                */
+    /* #endif                                */
+    /* #if 'PotentialAndForce' in KERNEL_VF  */
+    /*     #define VFSUFFIX  '_VF'           */
+    /* #elif 'Potential' in KERNEL_VF        */
+    /*     #define VFSUFFIX '_V'             */
+    /* #else                                 */
+    /*     #define VFSUFFIX '_F'             */
+    /* #endif                                */
+
+    /* #if KERNEL_ELEC != 'None' and KERNEL_VDW != 'None' */
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
+    /* #elif KERNEL_ELEC != 'None' */
+    inc_nrnb(nrnb,eNR_NBKERNEL_ELEC{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
+    /* #else */
+    inc_nrnb(nrnb,eNR_NBKERNEL_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS});
+    /* #endif  */
+}
diff --git a/src/gmxlib/nonbonded/nb_kernel_f77_double/nb_kernel_f77_double.c b/src/gmxlib/nonbonded/nb_kernel_f77_double/nb_kernel_f77_double.c

index 686b57dfd387a81c8b696082f59b3880e4fa6961..f36f988f8bb552cffcb8bd00bbb51c631df804e0 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_f77_double/nb_kernel_f77_double.c
+++ b/src/gmxlib/nonbonded/nb_kernel_f77_double/nb_kernel_f77_double.c
@@ -35,7 +35,7 @@
  
  #include "types/nrnb.h"
  #include "nb_kernel_f77_double.h"
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  
  /* Include standard kernel headers in local directory */
diff --git a/src/gmxlib/nonbonded/nb_kernel_f77_double/nb_kernel_f77_double.h b/src/gmxlib/nonbonded/nb_kernel_f77_double/nb_kernel_f77_double.h

index 1ab0761f808fe77b3c90335a978c03b51edc93c7..18a5f3ec268df13685da5ea44d92b75c91840874 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_f77_double/nb_kernel_f77_double.h
+++ b/src/gmxlib/nonbonded/nb_kernel_f77_double/nb_kernel_f77_double.h
@@ -39,7 +39,7 @@
  
  #include <stdio.h>
  
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  void
  nb_kernel_setup_f77_double(FILE *fplog,nb_kernel_t **list);
diff --git a/src/gmxlib/nonbonded/nb_kernel_f77_single/nb_kernel_f77_single.c b/src/gmxlib/nonbonded/nb_kernel_f77_single/nb_kernel_f77_single.c

index 35edd9b9ed6627707bc69a65ab96e389ae8ba78b..493873195d1c434fab767df8cd80285809fdeda7 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_f77_single/nb_kernel_f77_single.c
+++ b/src/gmxlib/nonbonded/nb_kernel_f77_single/nb_kernel_f77_single.c
@@ -35,7 +35,7 @@
  
  #include "types/nrnb.h"
  #include "nb_kernel_f77_single.h"
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  
  /* Include standard kernel headers in local directory */
diff --git a/src/gmxlib/nonbonded/nb_kernel_f77_single/nb_kernel_f77_single.h b/src/gmxlib/nonbonded/nb_kernel_f77_single/nb_kernel_f77_single.h

index 0498c519272901cd4190ac3567683c133844c03b..0b3ed0604b850a80f9696fd8e0ae98c5e4b25504 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_f77_single/nb_kernel_f77_single.h
+++ b/src/gmxlib/nonbonded/nb_kernel_f77_single/nb_kernel_f77_single.h
@@ -39,7 +39,7 @@
  
  #include <stdio.h>
  
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  void
  nb_kernel_setup_f77_single(FILE *fplog,nb_kernel_t **list);
diff --git a/src/gmxlib/nonbonded/nb_kernel_power6/nb_kernel_power6.c b/src/gmxlib/nonbonded/nb_kernel_power6/nb_kernel_power6.c

index 8d77ad11af0e87cc489a0a416837b50ffe90dec1..5682952996ab99ca8f31df2923f9aa0b30bcf7a6 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_power6/nb_kernel_power6.c
+++ b/src/gmxlib/nonbonded/nb_kernel_power6/nb_kernel_power6.c
@@ -35,7 +35,7 @@
  
  #include "types/nrnb.h"
  #include "nb_kernel_power6.h"
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  
  /* Include standard kernel headers in local directory */
diff --git a/src/gmxlib/nonbonded/nb_kernel_power6/nb_kernel_power6.h b/src/gmxlib/nonbonded/nb_kernel_power6/nb_kernel_power6.h

index 213280d07867bbbb2d0dc89f91edc951338a5a8f..e2d6c5c2abe5c982c71bbab64b5a10d3eaa358cc 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_power6/nb_kernel_power6.h
+++ b/src/gmxlib/nonbonded/nb_kernel_power6/nb_kernel_power6.h
@@ -39,7 +39,7 @@
  
  #include <stdio.h>
  
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  void
  nb_kernel_setup_power6(FILE *fplog,nb_kernel_t **list);
diff --git a/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel400_sse2_double.c b/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel400_sse2_double.c

index bd94698769803db94128cdf3c73694f26d0b1c76..7954df51ae5400522892bfc9aaaed3cd3f93d1c8 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel400_sse2_double.c
+++ b/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel400_sse2_double.c
@@ -23,7 +23,7 @@
  #include <gmx_sse2_double.h>
  
  /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  
  void nb_kernel400_sse2_double(int *           p_nri,
diff --git a/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel410_sse2_double.c b/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel410_sse2_double.c

index 11ec2e97e716552bd9b4ca990f202e2240df69c3..780c941b546a59a25b2611a2a71a8e77b0af32da 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel410_sse2_double.c
+++ b/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel410_sse2_double.c
@@ -23,7 +23,7 @@
  #include <gmx_sse2_double.h>
  
  /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  
  
diff --git a/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel430_sse2_double.c b/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel430_sse2_double.c

index 3c11023c37d97bf248062bcdb0d634526d342908..85c0832aefcdf478f7edf62d48ef43e5dd90b119 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel430_sse2_double.c
+++ b/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel430_sse2_double.c
@@ -23,7 +23,7 @@
  #include <gmx_sse2_double.h>
  
  /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  
  void nb_kernel430_sse2_double(int *           p_nri,
diff --git a/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c b/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c

index 7b0b6a0af0f4c58d84f3d4139af5de9f893d1cf0..4626eba996a69576407d90aaa30f8bc0cbd0ef23 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c
+++ b/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.c
@@ -36,7 +36,7 @@
  #include <stdlib.h>
  #include <stdio.h>
  
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  #include "nb_kernel_sse2_double.h"
  
  static nb_kernel_t *
diff --git a/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h b/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h

index 1c6735c53a7f76d95dcfaecfe03f0147a114231a..c6e1a6615d6b3ca481b45db84f64fd064e680619 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h
+++ b/src/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h
@@ -27,7 +27,7 @@
  
  #include <types/simple.h>
  
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel400_sse2_single.c b/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel400_sse2_single.c

index 33989e3222fef962b9fead59b18b6df191bf8ef3..e11930e1d282a718d9bb47b5b413eb924f9128a6 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel400_sse2_single.c
+++ b/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel400_sse2_single.c
@@ -23,7 +23,7 @@
  #include <gmx_sse2_single.h>
  
  /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  
  void nb_kernel400_sse2_single(int *           p_nri,
diff --git a/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel410_sse2_single.c b/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel410_sse2_single.c

index 4b567fa0f36a8a1e6b082f0735ccf2492054533c..18b99ec7e7b631d0a58d4ed9cab0715dc721d374 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel410_sse2_single.c
+++ b/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel410_sse2_single.c
@@ -23,7 +23,7 @@
  #include <gmx_sse2_single.h>
  
  /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  
  void nb_kernel410_sse2_single(int *           p_nri,
diff --git a/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel430_sse2_single.c b/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel430_sse2_single.c

index 7e3102350cbe58b503a3341cd4de37c3ce95c4da..c06c1452ef6d21dcb4caf40949aade89277b07ac 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel430_sse2_single.c
+++ b/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel430_sse2_single.c
@@ -24,7 +24,7 @@
  #include <gmx_sse2_single.h>
  
  /* get gmx_gbdata_t */
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  
  
  void nb_kernel430_sse2_single(int *           p_nri,
diff --git a/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c b/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c

index 4daf48c015145ce79b7d54e4e488e0f71ff0fb71..6a2d881c15f92a4363accaaed778cf7fd89273ea 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c
+++ b/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.c
@@ -41,7 +41,7 @@
  #include <intrin.h>
  #endif
  
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  #include "nb_kernel_sse2_single.h"
  
  static nb_kernel_t *
diff --git a/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h b/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h

index fa70a01244565353f4af27b80ee9c3f7607e1c97..29412ffb4d76f7db85bb92fffe7e8819e81e4465 100644 (file)
--- a/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h
+++ b/src/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h
@@ -27,7 +27,7 @@
  
  #include <types/simple.h>
  
-#include "../nb_kerneltype.h"
+#include "../nb_kernel.h"
  #include "nb_kernel_allvsall_sse2_single.h"
  #include "nb_kernel_allvsallgb_sse2_single.h"
  
diff --git a/src/gmxlib/nonbonded/nb_kerneltype.h b/src/gmxlib/nonbonded/nb_kerneltype.h

deleted file mode 100644 (file)

index 6c8e443..0000000
--- a/src/gmxlib/nonbonded/nb_kerneltype.h
+++ /dev/null
@@ -1,381 +0,0 @@
-/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- 
- *
- * Gromacs 4.0                         Copyright (c) 1991-2003
- * David van der Spoel, Erik Lindahl, University of Groningen.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * To help us fund GROMACS development, we humbly ask that you cite
- * the research papers on the package. Check out http://www.gromacs.org
- * 
- * And Hey:
- * Gnomes, ROck Monsters And Chili Sauce
- */
-
-#ifndef _NB_KERNEL_H_
-#define _NB_KERNEL_H_
-
-#include <types/simple.h>
-
-/** \file
- *  \brief Function interfaces and nonbonded kernel meta-data.
- *
- *  \internal
- *
- *  This file contains the definitions of the call sequences and 
- *  parameter documetation for the nonbonded kernels, both the 
- *  optimized level1/level2 kernels in Fortran or C, and the more 
- *  general level0 normal and free energy kernels. 
- *  It also defines an information structure used to record
- *  data about nonbonded kernels, their type of call sequence, 
- *  and the number of flops used in the outer and inner loops.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-#if 0
-} /* fixes auto-indentation problems */
-#endif
-
-
-
-
-/* Temporary structure to be able to pass 2 GB data through the
- * work data pointer argument.
- */
-typedef struct 
-{
-       real      gb_epsilon_solvent;  /* Epsilon for solvent */
-       real      epsilon_r;           /* Epsilon for inner dielectric */
-       real *    gpol;                /* Polarization energy group */
-} gmx_gbdata_t;
-       
-       
-/** Interface to level1 optimized nonbonded kernels.
- * 
- *  \internal
- *
- *  To simplify the jungle of nonbonded function calls in earlier
- *  versions of Gromacs, we have changed most nonbonded kernels
- *  to use the same call sequence, even if some of the parameters
- *  are ununsed in a particular kernel.
- *
- *  This function call sequence is used by all level1 kernels. When this is
- *  written it is actually used by the level2 kernels too, but that might
- *  change in the future - don't assume they are identical.
- *  All arguments are passed as pointers to make to interface identical across 
- *  the Fortran and C implementations. For the same reason (Fortran & assembly) 
- *  we cannot use structures.
- *
- *  The nonbonded kernels should never be called directly - use
- *  the setup and wrapper routines in gmx_nonbonded.h instead.
- *
- *  \param nri        Number of neighborlists/i-particles/outer-particles.
- *                    The outer loop of the nonbonded kernel will iterate
- *                    over these indices. This number must always
- *                    be provided,
- *  \param iinr       Atom number (starting on 0) for each index 0..(nri-1)
- *                    in the outer loop. These are the 'owner'/'parent' 
- *                    particles of each neighborlist. It is quite common
- *                    that a particle occurs several times with different
- *                    shift indices. This is the field 'ilist' in 
- *                    the neighborlist. This array must always be provided.
- *  \param jindex     This array specifies the index  of the first j neighbor
- *                    in the array jjnr[] which belongs to the corresponding
- *                    i atom. The j neighbors for all lists are merged into
- *                    one continous array, so we also get the first index for
- *                    the next list in the next element. The size of this list
- *                    is nri+1, and the last position contains the number of
- *                    entries in the j list. Confused? It is actually pretty
- *                    straightforward; for index <tt>i</tt> the i atom is 
- *                    <tt>iinr[i]</tt>, and the atom indices of the neighbors 
- *                    to interact with are in positions <tt>jindex[i]</tt> 
- *                    through <tt>(jindex[i+1]-1)</tt> in <tt>jjnr[]</tt>.
- *                    This corresponds to ilist_jindex in the neighborlist.
- *                    This array must always be provided.
- *  \param jjnr       Array with the j particle neighbors of list i in positions
- *                    <tt>jindex[i]</tt> through <tt>(jindex[i+1]-1)</tt>. This
- *                    field is called jlist in the neighborlist structure.
- *                    This array must always be provided.
- *  \param shift      Array with shift vector index corresponding to each 
- *                    iinr neighborlist. In most codes, periodic boundary 
- *                    conditions are applied by checking pairwise distances 
- *                    of particles, and correcting differences of more than 
- *                    half a box. This is costly (latencies) and unnecessary 
- *                    since we already decided which periodic copy to use during
- *                    neighborsearching. We solve this by introducing 
- *                    \a shiftvectors and a \a shiftindex. The shiftvectors is 
- *                    normally an array of 3*3*3 vectors, corresponding to 
- *                    displacements +- one box unit in one or more directions.
- *                    The central box (no displacement) is always in the middle,
- *                    index 13 in the case of 3*3*3 shift vectors.
- *                    Imagine a particle interacting with two particles in the
- *                    current box, and three particles in the box to the left. 
- *                    We represent this with two ilist entries, one with shift 
- *                    index=13 (central box) and one with shift index=12. 
- *                    all neighbors in each sublist will have the same shift,  
- *                    so in the nonbonded interaction, we only have to add the 
- *                    shift vector to the outer (i) particle coordinates before
- *                    starting the loop over neighbors. This extracts periodic 
- *                    boundary condition calculations from the inner loops, 
- *                    increasing performance significantly. This array must
- *                    always be provided, but if you do not use any periodic
- *                    boundary conditions you could set all elements to zero
- *                    and provide a shiftvec array of length three, with all
- *                    elements there too being zero.
- *  \param shiftvec   The shift vectors for each index. Since the code needs to
- *                    interface with Fortran or assembly, we have to use 
- *                    pointers to float/double instead of the gmx_rvec_t type.
- *                    The x/y/z shift distances for shift index idx are in
- *                    <tt>shiftvec[3*idx]</tt>, <tt>shiftvec[3*idx+1]</tt>, 
- *                    and <tt>shiftvec[3*idx+2]</tt>. This is fully binary 
- *                    compatible with an array of gmx_rvec_t, so you can simply
- *                    use a typecast when calling. The length of the array is
- *                    three times the number of shift indices. This array
- *                    must be provided, but see the comment for shift.
- *  \param fshift     Forces from 'other periodic boxes' for each shift index.
- *                    The shift concept does not affect the forces on individual
- *                    atoms, but it will have an effect on the virial. To 
- *                    account for this, we add the total force on the i particle 
- *                    in each outer loop to the current shift index in this 
- *                    array, which has exactly the same dimensions as the 
- *                    shiftvec array above. The Gromacs manual describes how 
- *                    this is used to calculate the effect in the virial.
- *                    Dimension is identical to shiftvec.
- *                    For kernels that do not calculate forces this can be NULL.
- *  \param gid        Energy group index for each i neighborlist index; this
- *                    corresponds to ilist_groupindex in the neighborlist and is
- *                    used to decompose energies into groupwise contributions.
- *                    If an i particle belongs to group A, and has interactions 
- *                    both with groups A,B, and C, you will get three different 
- *                    i list entries with energy group indices corresponding to 
- *                    A-A, A-B, and A-C neighbors, in addition to possible
- *                    copies due to different shift indices. The energies will 
- *                    be added to this index (i.e. index=gid[i]) in the vc and 
- *                    vnb arrays. This array must always be provided.
- *                    If you don't want to decompose energies all groups should 
- *                    be set to index 0 (length or the array nri).
- *  \param pos        Coordinates of atoms, starting on 0 as all arrays. To
- *                    interface with fortran and assembly this is a simple list
- *                    of floats, with x/y/z of atom n in positions
- *                    <tt>pos[3*n]</tt>, <tt>pos[3*n+1]</tt>, and
- *                    <tt>pos[3*n+2]</tt>. This is binary compatible with,
- *                    and can be typecast from, an array of gmx_rvec_t.
- *                    This array must always be provided.
- *  \param faction    Forces of atoms. To interface with fortran and assembly 
- *                    this is a simple list of floats, with x/y/z of atom n in 
- *                    positions <tt>pos[3*n]</tt>, <tt>pos[3*n+1]</tt>, and
- *                    <tt>pos[3*n+2]</tt>. This is binary compatible with,
- *                    and can be typecast from, an array of gmx_rvec_t.
- *                    For kernels that do not calculate forces this can be NULL.
- *  \param charge     Array with the charge of atom n in position n.
- *                    If you are calling a kernel without coulomb interaction
- *                    this parameter can safely be set to NULL.
- *  \param facel      Factor to multiply all electrostatic interactions with;
- *                    this is essentially 1/(4*pi*epsilon). For kernels without
- *                    coulomb interaction the value will not be used. In the
- *                    fortran version (pass-by-reference) it can be NULL in
- *                    that case.
- *  \param krf        The 'k' constant for reaction-field electrostatic
- *                    interactions. Can be zero/NULL for non-RF interactions.
- *  \param crf        The 'c' constant for reaction-field electrostatic
- *                    interactions. Can be zero/NULL for non-RF interactions.
- *  \param vc         List of Coulomb energies, the length is equal to the
- *                    number of energy group combinations. As described for
- *                    gid above, the coulomb energy of each outer/i-particle
- *                    list will be added to vc[gid[i]]. Can be NULL for
- *                    kernels without electrostatic interaction.
- *  \param type       Array with the index of the Van der Waals type for each
- *                    atom, starting on 0 . This is used to look up the VdW 
- *                    parameters. When no VdW interactions are calculated 
- *                    (i.e. only Coulomb), this can be set to NULL. 
- *  \param ntype      Number of VdW types, used to look up the VdW parameters. 
- *                    This parameter can be zero (NULL for pass-by-reference)
- *                    if the kernel does not include VdW interactions.
- *  \param vdwparam   Array with Van der Waals parameters. The contents and 
- *                    size depends on the number of parameters required for
- *                    the selected VdW interaction (3 for B-ham, 2 for LJ, etc).
- *                    There are a total of ntype*ntype pair combinations, and 
- *                    the size of this array is the number of such pairs times
- *                    the number of parameters.
- *                    With 'nparam' parameters, and 'ntype' types in total,
- *                    the first parameter for interactions between atoms of VdW 
- *                    type 'ti' and VdW type 'tj' is located in
- *                    <tt>vdwparam[nparam*(ntype*ti+tj)]</tt>, and the 
- *                    additional parameters follow directly in the next 
- *                    position(s). Note that the array corresponds to a 
- *                    symmetric matrix - you should get the same parameters if
- *                    you swap ti and tj. This parameter can be NULL 
- *                    if the kernel does not include VdW interactions.
- *  \param vvdw       List of Van der Waals energies, the length is equal to 
- *                    the number of energy group combinations. As described 
- *                    for gid above, the coulomb energy of each outer list
- *                    will be added to vc[gid[i]]. Can be NULL for
- *                    kernels without electrostatic interaction.
- *  \param tabscale   Distance between table points in the vftab table.
- *                    This is always the same for Coulomb and VdW when both
- *                    are tabulated. If the kernel does not include any
- *                    tabulated interactions it can be zero, or NULL when
- *                    passed-by-reference.
- *  \param vftab      Table data for Coulomb and/or VdW interactions. 
- *                    Each 'point' in the table consists of the four 
- *                    floating-point numbers Y,F,G,H as described in the
- *                    Gromacs manual. If the kernel only tabulates the 
- *                    Coulomb interaction this is followed by the next
- *                    point. If both Coulomb and Van der Waals interactions 
- *                    are tabulated it is instead followed by another
- *                    four numbers for the dispersion table at the same
- *                    point, and then finally the repulsion table, i.e.
- *                    a total of 12 numbers per table point. If only 
- *                    Van der Waals interactions are tabulated the 
- *                    Dispersion and Repulsion table (in that order) use
- *                    a total of 8 floating-point numbers per point.
- *                    This array only needs to be provided for kernels with
- *                    tabulated interactions - otherwise it can be NULL.
- *  \param invsqrta   Array with the inverse square root of the born radius
- *                    of each atom. This can safely be NULL when the kernel
- *                    does not calculate Generalized Born interactions.
- *  \param dvda       Array where the derivative of the potential with respect
- *                    to the born radius for each atom will be written. This
- *                    is necessary to calculate the effect of born radius
- *                    derivatives on forces. However, just as for the force
- *                    arrays it must be provided even when not calculating
- *                    forces, since an implementation might not provide a
- *                    non-force version of the routine. When the kernel does
- *                    not calculate Generalized Born interactions it can 
- *                    safely be set to NULL, though.
- *  \param gbtabscale Distance between (scaled) table points for tabulated 
- *                    Generalized Born interactions. If the kernel does not 
- *                    calculate Generalized Born interactions it can be zero, 
- *                    or NULL when passed-by-reference. Note that this is 
- *                    usually different from the standard (non-GB) table scale.
- *  \param gbtab      Table data for Generalized Born Coulomb interactions.
- *                    Since these interactions contain parameters that 
- *                    enter in a non-trivial way I have introduced a trick
- *                    where the tabulated interaction is a function of
- *                    the distance scaled with the square roots of the two
- *                    born radii. Apart from this it contains similar 
- *                    data (Y,F,G,H) as the normal Coulomb/VdW tables.
- *                    See comments in the table code for details. This
- *                    can safely be set to NULL if you are not using
- *                    Generalized Born interactions.
- *  \param nthreads   Number of threads calling the kernel concurrently.
- *                    This value is only used to optimize the chunk size
- *                    of the neighborlists processed by each thread, so
- *                    it won't cause incorrect results if you get it wrong.
- *  \param count      Pointer to a common counter to synchronize 
- *                    multithreaded execution. This is normally the counter
- *                    in the neighborlist. It must be set to zero before
- *                    calling the kernel to get correct results. Note that
- *                    this is necessary even for a single thread if Gromacs
- *                    is compiled with thread support (meaning it is
- *                    a mandatory parameter).
- *  \param mtx        Pointer to a mutex protecting the counter, masked as 
- *                    a pointer-to-void since thread support is optional. 
- *                    However, if Gromacs is compiled with threads you \a must 
- *                    provide a mutex - it would hurt performance to check
- *                    it for each iteration in the nonbonded kernel. The
- *                    mutex is normally created automatically by the 
- *                    neighborlist initialization routine gmx_nlist_init().
- *                    (Yet another reason not to call kernels directly).
- *  \param outeriter  The number of iterations performed in the outer loop
- *                    of the kernel will be written to this integer. If
- *                    multiple threads are calling the loop this will be
- *                    the part of nri handled by this thread. 
- *                    This value is only used to get accurate flop accounting.
- *                    Since we cannot check for NULL pointers in Fortran it must
- *                    always be provided, but you can discard
- *                    the result if you don't need it.
- *  \param inneriter  The number of iterations performed in the inner loop
- *                    of the kernel will be written to this integer; compare
- *                    with nouter. Since we cannot check for NULL pointers in
- *                    Fortran it must always be provided, but you can discard
- *                    the result if you don't need it.
- *  \param work       Double precision work array - to be used for optimization.
- *
- *  \warning          There is \a very little error control inside
- *                    the nonbonded kernels in order to maximize performance.
- *                    It cannot be overemphasized that you should never call
- *                    them directly, but rely on the higher-level routines.
- */
-typedef void
-nb_kernel_t(int *             nri,
-            int *             iinr,
-            int *             jindex,
-            int *             jjnr,
-            int *             shift,
-            real *            shiftvec,
-            real *            fshift,
-            int *             gid,
-            real *            pos,
-            real *            faction,
-            real *            charge,
-            real *            facel,
-            real *            krf,
-            real *            crf,
-            real *            vc,
-            int *             type,
-            int *             ntype,
-            real *            vdwparam,
-            real *            vvdw,
-            real *            tabscale,
-            real *            vftab,
-            real *            invsqrta,
-            real *            dvda,
-            real *            gbtabscale,
-            real *            gbtab,
-            int *             nthreads,
-            int *             count,
-            void *            mtx,
-            int *             outeriter,
-            int *             inneriter,
-            real *            work);
-
-
-
-typedef void
-nb_adress_kernel_t(int *             nri,
-            int *             iinr,
-            int *             jindex,
-            int *             jjnr,
-            int *             shift,
-            real *            shiftvec,
-            real *            fshift,
-            int *             gid,
-            real *            pos,
-            real *            faction,
-            real *            charge,
-            real *            facel,
-            real *            krf,
-            real *            crf,
-            real *            vc,
-            int *             type,
-            int *             ntype,
-            real *            vdwparam,
-            real *            vvdw,
-            real *            tabscale,
-            real *            vftab,
-            real *            invsqrta,
-            real *            dvda,
-            real *            gbtabscale,
-            real *            gbtab,
-            int *             nthreads,
-            int *             count,
-            void *            mtx,
-            int *             outeriter,
-            int *             inneriter,
-            real              force_cap,
-            real *            wf);
-
-
-
-#ifdef __cplusplus
-}
-#endif
-
-
-#endif /* _NB_KERNEL_H_ */
diff --git a/src/gmxlib/nonbonded/nonbonded.c b/src/gmxlib/nonbonded/nonbonded.c

index d0120c03fa88385b586da6327ca2be438573df8e..8efe33e40fc88ed09b640b80e1d31c2e12d0e791 100644 (file)
--- a/src/gmxlib/nonbonded/nonbonded.c
+++ b/src/gmxlib/nonbonded/nonbonded.c
@@ -37,11 +37,10 @@
  #include <config.h>
  #endif
  
-#ifdef GMX_THREAD_SHM_FDECOMP
+#ifdef GMX_THREAD_MPI
  #include <thread_mpi.h>
  #endif
  
-
  #include <stdio.h>
  #include <stdlib.h>
  #include "typedefs.h"
@@ -51,6 +50,7 @@
  #include "vec.h"
  #include "maths.h"
  #include "macros.h"
+#include "string2.h"
  #include "force.h"
  #include "names.h"
  #include "main.h"
@@ -63,323 +63,161 @@
  #include "smalloc.h"
  #include "nonbonded.h"
  
+#include "nb_kernel.h"
  #include "nb_kernel_c/nb_kernel_c.h"
-#include "nb_kernel_adress_c/nb_kernel_c_adress.h"
+
  #include "nb_free_energy.h"
  #include "nb_generic.h"
  #include "nb_generic_cg.h"
  #include "nb_generic_adress.h"
  
  
-/* 1,4 interactions uses kernel 330 directly */
-#include "nb_kernel_c/nb_kernel330.h" 
-#include "nb_kernel_adress_c/nb_kernel330_adress.h"
-
-
-
-#if 0 && defined (GMX_X86_SSE2)
-#  ifdef GMX_DOUBLE
-#    include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
-#  else
-#    include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
-#  endif
-#endif
  
-#if defined(GMX_FORTRAN)
-#  ifdef GMX_DOUBLE
-#    include "nb_kernel_f77_double/nb_kernel_f77_double.h"
-#  else
-#    include "nb_kernel_f77_single/nb_kernel_f77_single.h"
-#  endif
+#ifdef GMX_THREAD_MPI
+static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
  #endif
+static gmx_bool            nonbonded_setup_done  = FALSE;
  
  
-#ifdef GMX_POWER6
-#include "nb_kernel_power6/nb_kernel_power6.h"
+void
+gmx_nonbonded_setup(FILE *         fplog,
+                    t_forcerec *   fr,
+                    gmx_bool       bGenericKernelOnly)
+{
+#ifdef GMX_THREAD_MPI
+    tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
  #endif
+    /* Here we are guaranteed only one thread made it. */
+    if(nonbonded_setup_done==FALSE)
+    {
+        if(bGenericKernelOnly==FALSE)
+        {
+            /* Add the generic kernels to the structure stored statically in nb_kernel.c */
  
-#ifdef GMX_BLUEGENE
-#include "nb_kernel_bluegene/nb_kernel_bluegene.h"
-#endif
+            /* Add interaction-specific C kernels */
+            nb_kernel_list_add_kernels(kernellist_c,kernellist_c_size);
  
+            /* Add interaction-specific accelerated SSE kernels, etc. here */
+        }
+        /* Create a hash for faster lookups */
+        nb_kernel_list_hash_init();
  
+        nonbonded_setup_done=TRUE;
+    }
+#ifdef GMX_THREAD_MPI
+    tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
+#endif
+}
  
-enum { TABLE_NONE, TABLE_COMBINED, TABLE_COUL, TABLE_VDW, TABLE_NR };
  
  
-/* Table version for each kernel.
- */
-static const int 
-nb_kernel_table[eNR_NBKERNEL_NR] = 
+void
+gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
  {
-  TABLE_NONE,     /* kernel010 */
-  TABLE_NONE,     /* kernel020 */
-  TABLE_VDW,      /* kernel030 */
-  TABLE_NONE,     /* kernel100 */
-  TABLE_NONE,     /* kernel101 */
-  TABLE_NONE,     /* kernel102 */
-  TABLE_NONE,     /* kernel103 */
-  TABLE_NONE,     /* kernel104 */
-  TABLE_NONE,     /* kernel110 */
-  TABLE_NONE,     /* kernel111 */
-  TABLE_NONE,     /* kernel112 */
-  TABLE_NONE,     /* kernel113 */
-  TABLE_NONE,     /* kernel114 */
-  TABLE_NONE,     /* kernel120 */
-  TABLE_NONE,     /* kernel121 */
-  TABLE_NONE,     /* kernel122 */
-  TABLE_NONE,     /* kernel123 */
-  TABLE_NONE,     /* kernel124 */
-  TABLE_VDW,      /* kernel130 */
-  TABLE_VDW,      /* kernel131 */
-  TABLE_VDW,      /* kernel132 */
-  TABLE_VDW,      /* kernel133 */
-  TABLE_VDW,      /* kernel134 */
-  TABLE_NONE,     /* kernel200 */
-  TABLE_NONE,     /* kernel201 */
-  TABLE_NONE,     /* kernel202 */
-  TABLE_NONE,     /* kernel203 */
-  TABLE_NONE,     /* kernel204 */
-  TABLE_NONE,     /* kernel210 */
-  TABLE_NONE,     /* kernel211 */
-  TABLE_NONE,     /* kernel212 */
-  TABLE_NONE,     /* kernel213 */
-  TABLE_NONE,     /* kernel214 */
-  TABLE_NONE,     /* kernel220 */
-  TABLE_NONE,     /* kernel221 */
-  TABLE_NONE,     /* kernel222 */
-  TABLE_NONE,     /* kernel223 */
-  TABLE_NONE,     /* kernel224 */
-  TABLE_VDW,      /* kernel230 */
-  TABLE_VDW,      /* kernel231 */
-  TABLE_VDW,      /* kernel232 */
-  TABLE_VDW,      /* kernel233 */
-  TABLE_VDW,      /* kernel234 */
-  TABLE_COUL,     /* kernel300 */
-  TABLE_COUL,     /* kernel301 */
-  TABLE_COUL,     /* kernel302 */
-  TABLE_COUL,     /* kernel303 */
-  TABLE_COUL,     /* kernel304 */
-  TABLE_COUL,     /* kernel310 */
-  TABLE_COUL,     /* kernel311 */
-  TABLE_COUL,     /* kernel312 */
-  TABLE_COUL,     /* kernel313 */
-  TABLE_COUL,     /* kernel314 */
-  TABLE_COUL,     /* kernel320 */
-  TABLE_COUL,     /* kernel321 */
-  TABLE_COUL,     /* kernel322 */
-  TABLE_COUL,     /* kernel323 */
-  TABLE_COUL,     /* kernel324 */
-  TABLE_COMBINED, /* kernel330 */
-  TABLE_COMBINED, /* kernel331 */
-  TABLE_COMBINED, /* kernel332 */
-  TABLE_COMBINED, /* kernel333 */
-  TABLE_COMBINED, /* kernel334 */
-  TABLE_NONE,     /* kernel400 */
-  TABLE_NONE,     /* kernel410 */
-  TABLE_VDW       /* kernel430 */
-};
-
+    const char *     elec;
+    const char *     elec_mod;
+    const char *     vdw;
+    const char *     vdw_mod;
+    const char *     geom;
+    const char *     other;
+    const char *     vf;
+
+    const char *     archs[] = { "c" };
+    int              i;
+
+    if(nonbonded_setup_done==FALSE)
+    {
+        /* We typically call this setup routine before starting timers,
+         * but if that has not been done for whatever reason we do it now.
+         */
+        gmx_nonbonded_setup(NULL,NULL,FALSE);
+    }
  
+    /* Not used yet */
+    other="";
  
-static nb_kernel_t **
-nb_kernel_list = NULL;
+    nl->kernelptr_vf = NULL;
+    nl->kernelptr_v  = NULL;
+    nl->kernelptr_f  = NULL;
  
-static nb_adress_kernel_t **
-nb_kernel_list_adress = NULL;
+    elec     = gmx_nbkernel_elec_names[nl->ielec];
+    elec_mod = eintmod_names[nl->ielecmod];
+    vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
+    vdw_mod  = eintmod_names[nl->ivdwmod];
+    geom     = gmx_nblist_geometry_names[nl->igeometry];
  
-void
-gmx_setup_kernels(FILE *fplog,t_forcerec *fr,gmx_bool bGenericKernelOnly)
-{
-    int i;
-        
-    snew(nb_kernel_list,eNR_NBKERNEL_NR);
-    
-    /* Note that later calls overwrite earlier, so the preferred (fastest)
-     * version should be at the end. For instance, we call SSE after 3DNow.
-     */
-    
-    for(i=0; i<eNR_NBKERNEL_NR; i++)
-    {
-        nb_kernel_list[i] = NULL;
-    }
-    
-    if (bGenericKernelOnly)
-    {
-        return;
-    }
-       
-       if(fplog)
+    if(nl->free_energy)
      {
-           fprintf(fplog,"Configuring nonbonded kernels...\n");
+        nl->kernelptr_vf = gmx_nb_free_energy_kernel;
+        nl->kernelptr_f  = gmx_nb_free_energy_kernel;
      }
-       
-    nb_kernel_setup(fplog,nb_kernel_list);
-    
-    if(fr->use_cpu_acceleration==FALSE)
+    else if(!gmx_strcasecmp_min(geom,"CG-CG"))
      {
-        return;
+        nl->kernelptr_vf = gmx_nb_generic_cg_kernel;
+        nl->kernelptr_f  = gmx_nb_generic_cg_kernel;
      }
-
-    /* Setup kernels. The last called setup routine will overwrite earlier assignments,
-        * so we should e.g. test SSE3 support _after_ SSE2 support,
-     * and call e.g. Fortran setup before SSE.
-        */
-    
-#if defined(GMX_FORTRAN) && defined(GMX_DOUBLE)   
-    nb_kernel_setup_f77_double(fplog,nb_kernel_list);
-#endif
-       
-#if defined(GMX_FORTRAN) && !defined(GMX_DOUBLE)   
-    nb_kernel_setup_f77_single(fplog,nb_kernel_list);
-#endif
-       
-#ifdef GMX_BLUEGENE
-    nb_kernel_setup_bluegene(fplog,nb_kernel_list);
-#endif
-       
-#ifdef GMX_POWER6
-    nb_kernel_setup_power6(fplog,nb_kernel_list);
-#endif
-       
-       if(fplog)
+    else
      {
-           fprintf(fplog,"\n\n");
-    }
-}
-
-void
-gmx_setup_adress_kernels(FILE *fplog,gmx_bool bGenericKernelOnly) {
-    int i;
+        /* Try to find a specific kernel first */
  
-    snew(nb_kernel_list_adress, eNR_NBKERNEL_NR);
-
-    for (i = 0; i < eNR_NBKERNEL_NR; i++) {
-        nb_kernel_list_adress[i] = NULL;
-    }
+        for(i=0;i<asize(archs) && nl->kernelptr_vf==NULL ;i++)
+        {
+               nl->kernelptr_vf = nb_kernel_list_findkernel(log,archs[i],elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
+        }
+        for(i=0;i<asize(archs) && nl->kernelptr_f==NULL ;i++)
+        {
+            nl->kernelptr_f  = nb_kernel_list_findkernel(log,archs[i],elec,elec_mod,vdw,vdw_mod,geom,other,"Force");
+            /* If there is not force-only optimized kernel, is there a potential & force one? */
+            if(nl->kernelptr_f == NULL)
+            {
+                nl->kernelptr_f  = nb_kernel_list_findkernel(NULL,archs[i],elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
+            }
+        }
  
-    if (bGenericKernelOnly)
-    {
-        return;
+        /* Give up, pick a generic one instead */
+        if(nl->kernelptr_vf==NULL)
+        {
+            nl->kernelptr_vf = gmx_nb_generic_kernel;
+            nl->kernelptr_f  = gmx_nb_generic_kernel;
+            if(log)
+            {
+                fprintf(log,
+                        "WARNING - Slow generic NB kernel used for neighborlist with\n"
+                        "    Elec: '%s', Modifier: '%s'\n"
+                        "    Vdw:  '%s', Modifier: '%s'\n"
+                        "    Geom: '%s', Other: '%s'\n\n",
+                        elec,elec_mod,vdw,vdw_mod,geom,other);
+            }
+        }
      }
  
-    nb_kernel_setup_adress(fplog, nb_kernel_list_adress);
+    return;
  }
  
  void do_nonbonded(t_commrec *cr,t_forcerec *fr,
-                  rvec x[],rvec f[],t_mdatoms *mdatoms,t_blocka *excl,
-                  real egnb[],real egcoul[],real egpol[],rvec box_size,
+                  rvec x[],rvec f_shortrange[],rvec f_longrange[],t_mdatoms *mdatoms,t_blocka *excl,
+                  gmx_grppairener_t *grppener,rvec box_size,
                    t_nrnb *nrnb,real *lambda, real *dvdl,
                    int nls,int eNL,int flags)
  {
-    gmx_bool            bLR,bDoForces,bForeignLambda;
-       t_nblist *      nlist;
-       real *          fshift;
-       int             n,n0,n1,i,i0,i1,nrnb_ind,sz;
-       t_nblists       *nblists;
-       gmx_bool        bWater;
-       FILE *          fp;
-       int             fac=0;
-       int             nthreads = 1;
-       int             tabletype;
-       int             outeriter,inneriter;
-       real *          tabledata = NULL;
-       gmx_gbdata_t    gbdata;
-
-        nb_kernel_t         *kernelptr=NULL;
-        nb_adress_kernel_t  *adresskernelptr=NULL;
+       t_nblist *        nlist;
+       int               n,n0,n1,i,i0,i1,sz,range;
+       t_nblists *       nblists;
+    nb_kernel_data_t  kernel_data;
+    nb_kernel_t *     kernelptr=NULL;
+    rvec *            f;
      
-        gmx_bool        bCG; /* for AdresS */
-        int             k;/* for AdresS */
-
-    bLR            = (flags & GMX_DONB_LR);
-    bDoForces      = (flags & GMX_DONB_FORCES);
-    bForeignLambda = (flags & GMX_DONB_FOREIGNLAMBDA); 
-
-    bCG = FALSE;  /* for AdresS */
-
-       gbdata.gb_epsilon_solvent = fr->gb_epsilon_solvent;
-       gbdata.epsilon_r = fr->epsilon_r;
-       gbdata.gpol               = egpol;
+    kernel_data.flags                   = flags;
+    kernel_data.exclusions              = excl;
+    kernel_data.lambda                  = lambda;
+    kernel_data.dvdl                    = dvdl;
      
-    if (!fr->adress_type==eAdressOff && !bDoForces){
-        gmx_fatal(FARGS,"No force kernels not implemeted for adress");
-    }
-
-    if(fr->bAllvsAll) 
+    if(fr->bAllvsAll)
      {
-        if(fr->bGB)
-        {
-#if 0 && defined (GMX_X86_SSE2)
-# ifdef GMX_DOUBLE
-            if(fr->use_cpu_acceleration)
-            {
-                nb_kernel_allvsallgb_sse2_double(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
-                                                 &outeriter,&inneriter,&fr->AllvsAll_work);
-            }
-            else
-            {
-                nb_kernel_allvsallgb(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
-                                     &outeriter,&inneriter,&fr->AllvsAll_work);        
-            }
-#  else /* not double */
-            if(fr->use_cpu_acceleration)
-            {
-                nb_kernel_allvsallgb_sse2_single(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
-                                                 &outeriter,&inneriter,&fr->AllvsAll_work);
-            }
-            else
-            {
-                nb_kernel_allvsallgb(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
-                                     &outeriter,&inneriter,&fr->AllvsAll_work);        
-            }
-#  endif /* double/single alt. */
-#else /* no SSE support compiled in */
-            nb_kernel_allvsallgb(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
-                                 &outeriter,&inneriter,&fr->AllvsAll_work);                    
-#endif
-            inc_nrnb(nrnb,eNR_NBKERNEL_ALLVSALLGB,inneriter);
-        }
-        else
-        { 
-#if 0 && defined (GMX_X86_SSE2)
-# ifdef GMX_DOUBLE
-            if(fr->use_cpu_acceleration)
-            {
-                nb_kernel_allvsall_sse2_double(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
-                                               &outeriter,&inneriter,&fr->AllvsAll_work);
-            }
-            else 
-            {
-                nb_kernel_allvsall(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
-                                   &outeriter,&inneriter,&fr->AllvsAll_work);            
-            }
-            
-#  else /* not double */
-            if(fr->use_cpu_acceleration)
-            {
-                nb_kernel_allvsall_sse2_single(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
-                                               &outeriter,&inneriter,&fr->AllvsAll_work);
-            }
-            else 
-            {
-                nb_kernel_allvsall(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
-                                   &outeriter,&inneriter,&fr->AllvsAll_work);            
-            }
-
-#  endif /* double/single check */
-#else /* No SSE2 support compiled in */
-            nb_kernel_allvsall(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
-                               &outeriter,&inneriter,&fr->AllvsAll_work);
-#endif            
-            
-            inc_nrnb(nrnb,eNR_NBKERNEL_ALLVSALL,inneriter);
-        }
-        inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,outeriter);
          return;
      }
         
-    if (eNL >= 0) 
+    if (eNL >= 0)
      {
                 i0 = eNL;
                 i1 = i0+1;
@@ -390,340 +228,163 @@ void do_nonbonded(t_commrec *cr,t_forcerec *fr,
                 i1 = eNL_NR;
         }
         
-       if (nls >= 0) 
+       if (nls >= 0)
         {
                 n0 = nls;
                 n1 = nls+1;
         }
-       else 
+       else
         {
                 n0 = 0;
                 n1 = fr->nnblists;
         }
-       
-       if(nb_kernel_list == NULL)
-    {
-               gmx_fatal(FARGS,"gmx_setup_kernels has not been called");
-    }
-  
-    fshift = fr->fshift[0];
-  
-       for(n=n0; (n<n1); n++) 
+
+       for(n=n0; (n<n1); n++)
         {
                 nblists = &fr->nblists[n];
-               for(i=i0; (i<i1); i++) 
-               {
-                       outeriter = inneriter = 0;
-      
-                       if (bLR) 
-                       {
-                               nlist = &(nblists->nlist_lr[i]);
-                       }
-                       else
-                       {
-                               nlist = &(nblists->nlist_sr[i]);
-                       }
-                       
-                       if (nlist->nri > 0) 
-                       {
-                               nrnb_ind = nlist->il_code;
-                               
-                               if(nrnb_ind==eNR_NBKERNEL_FREE_ENERGY)
-                               {
-                                       /* generic free energy, use combined table */
-                                       tabledata = nblists->tab.tab;
-                               }
-                               else
-                               {
-                    if (bForeignLambda)
-                    {
-                        /* We don't need the non-perturbed interactions */
-                        continue;
-                    }
  
-                                       tabletype = nb_kernel_table[nrnb_ind];
-                                       
-                                       /* normal kernels, not free energy */
-                                       if (!bDoForces)
-                                       {
-                                               nrnb_ind += eNR_NBKERNEL_NR/2;
-                                       }
-                                       
-                                       if(tabletype == TABLE_COMBINED)
-                                       {
-                                               tabledata = nblists->tab.tab;
-                                       }
-                                       else if(tabletype == TABLE_COUL)
-                                       {
-                                               tabledata = nblists->coultab;
-                                       }
-                                       else if(tabletype == TABLE_VDW)
-                                       {
-                                               tabledata = nblists->vdwtab;
-                                       }
-                                       else
-                                       {
-                                               tabledata = NULL;
-                                       }
-                               }
-                               
-                               nlist->count = 0;
-                               
-                               if(nlist->free_energy)
-                               {
-                                       if(nlist->ivdw==enbvdwBHAM)
-                                       {
-                                               gmx_fatal(FARGS,"Cannot do free energy Buckingham interactions.");
-                                       }
-                                       
-                                       gmx_nb_free_energy_kernel(nlist->icoul,
-                                                                                         nlist->ivdw,
-                                                                                         nlist->nri,
-                                                                                         nlist->iinr,
-                                                                                         nlist->jindex,
-                                                                                         nlist->jjnr,
-                                                                                         nlist->shift,
-                                                                                         fr->shift_vec[0],
-                                                                                         fshift,
-                                                                                         nlist->gid,
-                                                                                         x[0],
-                                                                                         f[0],
-                                                                                         mdatoms->chargeA,
-                                                                                         mdatoms->chargeB,
-                                                                                         fr->epsfac,
-                                                                                         fr->k_rf,
-                                                                                         fr->c_rf,
-                                                                                         fr->ewaldcoeff,
-                                                                                         egcoul,
-                                                                                         mdatoms->typeA,
-                                                                                         mdatoms->typeB,
-                                                                                         fr->ntype,
-                                                                                         fr->nbfp,
-                                                                                         egnb,
-                                                                                         nblists->tab.scale,
-                                                                                         tabledata,
-                                                                                         lambda[efptCOUL],
-                                              lambda[efptVDW],
-                                              dvdl,
-                                              fr->sc_alphacoul,
-                                                                                         fr->sc_alphavdw,
-                                                                                         fr->sc_power,
-                                                                                         fr->sc_r_power,
-                                                                                         fr->sc_sigma6_def,
-                                              fr->sc_sigma6_min,
-                                              bDoForces,
-                                                                                         &outeriter,
-                                                                                         &inneriter);
-                }
-                else if (nlist->enlist == enlistCG_CG)
-                {
-                   if (fr->adress_type==eAdressOff){
-                    /* Call the charge group based inner loop */
-                       gmx_nb_generic_cg_kernel(nlist,
-                                                fr,
-                                                mdatoms,
-                                                x[0],
-                                                f[0],
-                                                fshift,
-                                                egcoul,
-                                                egnb,
-                                                nblists->tab.scale,
-                                                tabledata,
-                                                &outeriter,
-                                                &inneriter);
-                   }
-                   else
-                   {
-                       /*gmx_nb_generic_adress_kernel(nlist,
-                                                fr,
-                                                mdatoms,
-                                                x[0],
-                                                f[0],
-                                                fshift,
-                                                egcoul,
-                                                egnb,
-                                                nblists->tab.scale,
-                                                tabledata,
-                                                &outeriter,
-                                                &inneriter);*/
-                          gmx_fatal(FARGS,"Death & horror! Adress cgcg kernel not implemented anymore.\n");
+        kernel_data.table_elec              = &nblists->table_elec;
+        kernel_data.table_vdw               = &nblists->table_vdw;
+        kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
  
-                   }
+        for(range=0;range<2;range++)
+        {
+            /* Are we doing short/long-range? */
+            if(range==0)
+            {
+                /* Short-range */
+                if(!(flags & GMX_NONBONDED_DO_SR))
+                {
+                    continue;
                  }
-                else
+                kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
+                kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
+                kernel_data.energygrp_polarization  = grppener->ener[egGB];
+                nlist = nblists->nlist_sr;
+                f                                   = f_shortrange;
+            }
+            else if(range==1)
+            {
+                /* Long-range */
+                if(!(flags & GMX_NONBONDED_DO_LR))
                  {
-                    /* AdresS*/
-                    /* for adress we need to determine for each energy group wether it is explicit or coarse-grained */
-                    if (!fr->adress_type == eAdressOff) {                        
-                        bCG = FALSE;
-                        if ( !fr->adress_group_explicit[ mdatoms->cENER[nlist->iinr[0]] ] ){
-                            bCG=TRUE;
-                        }
-                        /* If this processor has only explicit atoms (w=1)
-                          skip the coarse grained force calculation. Same for
-                         only coarsegrained atoms and explicit interactions.
-                         Last condition is to make sure that generic kernel is not
-                         skipped*/
-                        if (mdatoms->pureex && bCG && nb_kernel_list[nrnb_ind] != NULL) continue;
-                        if (mdatoms->purecg && !bCG && nb_kernel_list[nrnb_ind] != NULL) continue;
-                    }
-
-                    if (fr->adress_type == eAdressOff ||
-                            mdatoms->pureex ||
-                            mdatoms->purecg){
-                        /* if we only have to calculate pure cg/ex interactions
-                         we can use the faster standard gromacs kernels*/
-                        kernelptr = nb_kernel_list[nrnb_ind];
-                    }else{
-                        /* This processor has hybrid interactions which means
-                         * we have to
-                         * use our own kernels. We have two kernel types: one that
-                         * calculates the forces with the explicit prefactor w1*w2
-                         * and one for coarse-grained with (1-w1*w2)
-                         * explicit kernels are the second part of the kernel
-                         *  list */
-                        if (!bCG) nrnb_ind += eNR_NBKERNEL_NR/2;                      
-                        adresskernelptr = nb_kernel_list_adress[nrnb_ind];
-                    }
-                    
-                    if (kernelptr == NULL && adresskernelptr == NULL)
-                     {
-                        /* Call a generic nonbonded kernel */
-                        
-                        /* If you want to hack/test your own interactions,
-                         * do it in this routine and make sure it is called
-                         * by setting the environment variable GMX_NB_GENERIC.
-                         */
-                        if (fr->adress_type==eAdressOff){
-
-                        gmx_nb_generic_kernel(nlist,
-                                              fr,
-                                              mdatoms,
-                                              x[0],
-                                              f[0],
-                                              fshift,
-                                              egcoul,
-                                              egnb,
-                                              nblists->tab.scale,
-                                              tabledata,
-                                              &outeriter,
-                                              &inneriter);
-                        }else /* do generic AdResS kernels (slow)*/
-                        {
-
-                            gmx_nb_generic_adress_kernel(nlist,
-                                                fr,
-                                                mdatoms,
-                                                x[0],
-                                                f[0],
-                                                fshift,
-                                                egcoul,
-                                                egnb,
-                                                nblists->tab.scale,
-                                                tabledata,
-                                                &outeriter,
-                                                &inneriter,
-                                                bCG);
-                        }
-
+                    continue;
+                }
+                kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
+                kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
+                kernel_data.energygrp_polarization  = grppener->ener[egGB];
+                nlist = nblists->nlist_lr;
+                f                                   = f_longrange;
+            }
  
+            for(i=i0; (i<i1); i++)
+            {
+                if (nlist[i].nri > 0)
+                {
+                    if(flags & GMX_NONBONDED_DO_POTENTIAL)
+                    {
+                        /* Potential and force */
+                        kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
                      }
                      else
                      {
-                        /* Call nonbonded kernel from function pointer */
-                        if (kernelptr!=NULL){
-                        (*kernelptr)( &(nlist->nri),
-                                      nlist->iinr,
-                                      nlist->jindex,
-                                      nlist->jjnr,
-                                      nlist->shift,
-                                      fr->shift_vec[0],
-                                      fshift,
-                                      nlist->gid,
-                                      x[0],
-                                      f[0],
-                                      mdatoms->chargeA,
-                                      &(fr->epsfac),
-                                      &(fr->k_rf),
-                                      &(fr->c_rf),
-                                      egcoul,
-                                      mdatoms->typeA,
-                                      &(fr->ntype),
-                                      fr->nbfp,
-                                      egnb,
-                                      &(nblists->tab.scale),
-                                      tabledata,
-                                      fr->invsqrta,
-                                      fr->dvda,
-                                      &(fr->gbtabscale),
-                                      fr->gbtab.tab,
-                                      &nthreads,
-                                      &(nlist->count),
-                                      nlist->mtx,
-                                      &outeriter,
-                                      &inneriter,
-                                      (real *)&gbdata);
-                        }else if (adresskernelptr != NULL)
-                        { /* Adress kernels */
-                          (*adresskernelptr)( &(nlist->nri),
-                                      nlist->iinr,
-                                      nlist->jindex,
-                                      nlist->jjnr,
-                                      nlist->shift,
-                                      fr->shift_vec[0],
-                                      fshift,
-                                      nlist->gid,
-                                      x[0],
-                                      f[0],
-                                      mdatoms->chargeA,
-                                      &(fr->epsfac),
-                                      &(fr->k_rf),
-                                      &(fr->c_rf),
-                                      egcoul,
-                                      mdatoms->typeA,
-                                      &(fr->ntype),
-                                      fr->nbfp,
-                                      egnb,
-                                      &(nblists->tab.scale),
-                                      tabledata,
-                                      fr->invsqrta,
-                                      fr->dvda,
-                                      &(fr->gbtabscale),
-                                      fr->gbtab.tab,
-                                      &nthreads,
-                                      &(nlist->count),
-                                      nlist->mtx,
-                                      &outeriter,
-                                      &inneriter,
-                                      fr->adress_ex_forcecap,
-                                      mdatoms->wf);
-                        }
+                        /* Force only, no potential */
+                        kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
                      }
-                }
-                
-                /* Update flop accounting */
-                               
-                               /* Outer loop in kernel */
-                switch (nlist->enlist) {
-                case enlistATOM_ATOM:   fac =  1; break;
-                case enlistSPC_ATOM:    fac =  3; break;
-                case enlistSPC_SPC:     fac =  9; break;
-                case enlistTIP4P_ATOM:  fac =  4; break;
-                case enlistTIP4P_TIP4P: fac = 16; break;
-                case enlistCG_CG:       fac =  1; break;
-                }
-                inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,fac*outeriter);
  
-                /* inner loop in kernel */
-                inc_nrnb(nrnb,nrnb_ind,inneriter);
+                    if(nlist[i].free_energy==0 && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
+                    {
+                        /* We don't need the non-perturbed interactions */
+                        continue;
+                    }
+                    (*kernelptr)(&(nlist[i]),x,f,fr,mdatoms,&kernel_data,nrnb);
+                 }
              }
          }
      }
  }
  
+static void
+nb_listed_warning_rlimit(const rvec *x,int ai, int aj,int * global_atom_index,real r, real rlimit)
+{
+    gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
+                "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
+                "This is likely either a 1,4 interaction, or a listed interaction inside\n"
+                "a smaller molecule you are decoupling during a free energy calculation.\n"
+                "Since interactions at distances beyond the table cannot be computed,\n"
+                "they are skipped until they are inside the table limit again. You will\n"
+                "only see this message once, even if it occurs for several interactions.\n\n"
+                "IMPORTANT: This should not happen in a stable simulation, so there is\n"
+                "probably something wrong with your system. Only change the table-extension\n"
+                "distance in the mdp file if you are really sure that is the reason.\n",
+                glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r,rlimit);
+
+    if (debug)
+    {
+        fprintf(debug,
+                "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
+                x[ai][XX],x[ai][YY],x[ai][ZZ],x[aj][XX],x[aj][YY],x[aj][ZZ],
+                glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r);
+    }
+}
+
+
  
-real 
-do_listed_vdw_q(int ftype,int nbonds,
+/* This might logically belong better in the nb_generic.c module, but it is only
+ * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
+ * extra functional call for every single pair listed in the topology.
+ */
+static real
+nb_evaluate_single(real r2, real tabscale,real *vftab,
+                   real qq, real c6, real c12, real *velec, real *vvdw)
+{
+    real       rinv,r,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VVe,FFe,VVd,FFd,VVr,FFr,fscal;
+    int        ntab;
+
+    /* Do the tabulated interactions - first table lookup */
+    rinv             = gmx_invsqrt(r2);
+    r                = r2*rinv;
+    rtab             = r*tabscale;
+    ntab             = rtab;
+    eps              = rtab-ntab;
+    eps2             = eps*eps;
+    ntab             = 12*ntab;
+    /* Electrostatics */
+    Y                = vftab[ntab];
+    F                = vftab[ntab+1];
+    Geps             = eps*vftab[ntab+2];
+    Heps2            = eps2*vftab[ntab+3];
+    Fp               = F+Geps+Heps2;
+    VVe              = Y+eps*Fp;
+    FFe              = Fp+Geps+2.0*Heps2;
+    /* Dispersion */
+    Y                = vftab[ntab+4];
+    F                = vftab[ntab+5];
+    Geps             = eps*vftab[ntab+6];
+    Heps2            = eps2*vftab[ntab+7];
+    Fp               = F+Geps+Heps2;
+    VVd              = Y+eps*Fp;
+    FFd              = Fp+Geps+2.0*Heps2;
+    /* Repulsion */
+    Y                = vftab[ntab+8];
+    F                = vftab[ntab+9];
+    Geps             = eps*vftab[ntab+10];
+    Heps2            = eps2*vftab[ntab+11];
+    Fp               = F+Geps+Heps2;
+    VVr              = Y+eps*Fp;
+    FFr              = Fp+Geps+2.0*Heps2;
+
+    *velec           = qq*VVe;
+    *vvdw            = c6*VVd+c12*VVr;
+
+    fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
+
+    return fscal;
+}
+
+
+real
+do_nonbonded_listed(int ftype,int nbonds,
                  const t_iatom iatoms[],const t_iparams iparams[],
                  const rvec x[],rvec f[],rvec fshift[],
                  const t_pbc *pbc,const t_graph *g,
@@ -732,371 +393,171 @@ do_listed_vdw_q(int ftype,int nbonds,
                  const t_forcerec *fr,gmx_grppairener_t *grppener,
                  int *global_atom_index)
  {
-    static    gmx_bool bWarn=FALSE;
-    real      eps,r2,*tab,rtab2=0;
-    rvec      dx,x14[2],f14[2];
-    int       i,ai,aj,itype;
-    int       typeA[2]={0,0},typeB[2]={0,1};
-    real      chargeA[2]={0,0},chargeB[2];
-    int       gid,shift_vir,shift_f;
-    int       j_index[] = { 0, 1 };
-    int       i0=0,i1=1,i2=2;
-    ivec      dt;
-    int       outeriter,inneriter;
-    int       nthreads = 1;
-    int       count;
-    real      krf,crf,tabscale;
-    int       ntype=0;
-    real      *nbfp=NULL;
-    real      *egnb=NULL,*egcoul=NULL;
-    t_nblist  tmplist;
-    int       icoul,ivdw;
-    gmx_bool      bMolPBC,bFreeEnergy;
+    int              ielec,ivdw;
+    real             qq,c6,c12;
+    rvec             dx;
+    ivec             dt;
+    int              i,j,itype,ai,aj,gid;
+    int              fshift_index;
+    real             r2,rinv;
+    real             fscal,velec,vvdw;
+    real *           energygrp_elec;
+    real *           energygrp_vdw;
+    static gmx_bool  warned_rlimit=FALSE;
+    /* Free energy stuff */
+    gmx_bool         bFreeEnergy;
+    real             LFC[2],LFV[2],DLF[2],lfac_coul[2],lfac_vdw[2],dlfac_coul[2],dlfac_vdw[2];
+    real             qqB,c6B,c12B,sigma2_def,sigma2_min;
      
-    gmx_bool      bCG; /* AdResS*/
-    real      wf14[2]={0,0}; /* AdResS*/
-   
-#if GMX_THREAD_SHM_FDECOMP
-    pthread_mutex_t mtx;
-#else
-    void *    mtx = NULL;
-#endif
-
      
-#if GMX_THREAD_SHM_FDECOMP
-    pthread_mutex_initialize(&mtx);
-#endif
-
-    bMolPBC = fr->bMolPBC;
-
      switch (ftype) {
-    case F_LJ14:
-    case F_LJC14_Q:
-        eps = fr->epsfac*fr->fudgeQQ;
-        ntype  = 1;
-        egnb   = grppener->ener[egLJ14];
-        egcoul = grppener->ener[egCOUL14];
-        break;
-    case F_LJC_PAIRS_NB:
-        eps = fr->epsfac;
-        ntype  = 1;
-        egnb   = grppener->ener[egLJSR];
-        egcoul = grppener->ener[egCOULSR];
-        break;
-    default:
-        gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",
-                  ftype);
-    }
-    tab = fr->tab14.tab;
-    rtab2 = sqr(fr->tab14.r);
-    tabscale = fr->tab14.scale;
-
-    krf = fr->k_rf;
-    crf = fr->c_rf;
-
-    /* Determine the values for icoul/ivdw. */
-    if (fr->bEwald) {
-        icoul = enbcoulOOR;
-    } 
-    else if(fr->bcoultab)
-    {
-        icoul = enbcoulTAB;
-    }
-    else if(fr->eeltype == eelRF_NEC)
-    {
-        icoul = enbcoulRF;
-    }
-    else 
-    {
-        icoul = enbcoulOOR;
+        case F_LJ14:
+        case F_LJC14_Q:
+            energygrp_elec = grppener->ener[egCOUL14];
+            energygrp_vdw  = grppener->ener[egLJ14];
+            break;
+        case F_LJC_PAIRS_NB:
+            energygrp_elec = grppener->ener[egCOULSR];
+            energygrp_vdw  = grppener->ener[egLJSR];
+            break;
+        default:
+            energygrp_elec = NULL; /* Keep compiler happy */
+            energygrp_vdw  = NULL; /* Keep compiler happy */
+            gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",ftype);
+            break;
      }
      
-    if(fr->bvdwtab)
+    if(fr->efep != efepNO)
      {
-        ivdw = enbvdwTAB;
+        /* Lambda factor for state A=1-lambda and B=lambda */
+        LFC[0] = 1.0 - lambda[efptCOUL];
+        LFV[0] = 1.0 - lambda[efptVDW];
+        LFC[1] = lambda[efptCOUL];
+        LFV[1] = lambda[efptVDW];
+
+        /*derivative of the lambda factor for state A and B */
+        DLF[0] = -1;
+        DLF[1] = 1;
+
+        /* precalculate */
+        sigma2_def = pow(fr->sc_sigma6_def,1.0/3.0);
+        sigma2_min = pow(fr->sc_sigma6_min,1.0/3.0);
+
+        for (i=0;i<2;i++)
+        {
+            lfac_coul[i]  = (fr->sc_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
+            dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFC[i]) : 1);
+            lfac_vdw[i]   = (fr->sc_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
+            dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFV[i]) : 1);
+        }
      }
      else
      {
-        ivdw = enbvdwLJ;
+        sigma2_min = sigma2_def = 0;
      }
-    
-    
-    bCG = FALSE; /*Adres*/
-    /* We don't do SSE or altivec here, due to large overhead for 4-fold 
-     * unrolling on short lists 
-     */
-    
+
      bFreeEnergy = FALSE;
-    for(i=0; (i<nbonds); ) 
+    for(i=0; (i<nbonds); )
      {
          itype = iatoms[i++];
          ai    = iatoms[i++];
          aj    = iatoms[i++];
          gid   = GID(md->cENER[ai],md->cENER[aj],md->nenergrp);
          
-        if (!fr->adress_type == eAdressOff) {
-            if (fr->adress_group_explicit[md->cENER[ai]] != fr->adress_group_explicit[md->cENER[aj]]){
-                /*exclude cg-ex interaction*/
-                continue;
-            }           
-            bCG = !fr->adress_group_explicit[md->cENER[ai]];
-            wf14[0] = md->wf[ai];
-            wf14[1] = md->wf[aj];
-        }
+        /* Get parameters */
          switch (ftype) {
-        case F_LJ14:
-            bFreeEnergy =
+            case F_LJ14:
+                bFreeEnergy =
                  (fr->efep != efepNO &&
                   ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
                    iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
                    iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
-            chargeA[0] = md->chargeA[ai];
-            chargeA[1] = md->chargeA[aj];
-            nbfp = (real *)&(iparams[itype].lj14.c6A);
-            break;
-        case F_LJC14_Q:
-            eps = fr->epsfac*iparams[itype].ljc14.fqq;
-            chargeA[0] = iparams[itype].ljc14.qi;
-            chargeA[1] = iparams[itype].ljc14.qj;
-            nbfp = (real *)&(iparams[itype].ljc14.c6);
-            break;
-        case F_LJC_PAIRS_NB:
-            chargeA[0] = iparams[itype].ljcnb.qi;
-            chargeA[1] = iparams[itype].ljcnb.qj;
-            nbfp = (real *)&(iparams[itype].ljcnb.c6);
-            break;
+                qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
+                c6               = iparams[itype].lj14.c6A;
+                c12              = iparams[itype].lj14.c12A;
+                break;
+            case F_LJC14_Q:
+                qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
+                c6               = iparams[itype].ljc14.c6;
+                c12              = iparams[itype].ljc14.c12;
+                break;
+            case F_LJC_PAIRS_NB:
+                qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
+                c6               = iparams[itype].ljcnb.c6;
+                c12              = iparams[itype].ljcnb.c12;
+                break;
+            default:
+                /* Cannot happen since we called gmx_fatal() above in this case */
+                qq = c6 = c12 = 0; /* Keep compiler happy */
+                break;
          }
          
-        if (!bMolPBC) 
+        /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
+         * included in the general nfbp array now. This means the tables are scaled down by the
+         * same factor, so when we use the original c6/c12 parameters from iparams[] they must
+         * be scaled up.
+         */
+        c6  *= 6.0;
+        c12 *= 12.0;
+        
+        /* Do we need to apply full periodic boundary conditions? */
+        if(fr->bMolPBC==TRUE)
          {
-            /* This is a bonded interaction, atoms are in the same box */
-            shift_f = CENTRAL;
-            r2 = distance2(x[ai],x[aj]);
+            fshift_index = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
          }
-        else 
+        else
          {
-            /* Apply full periodic boundary conditions */
-            shift_f = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
-            r2 = norm2(dx);
+            fshift_index = CENTRAL;
+            rvec_sub(x[ai],x[aj],dx);
          }
+        r2           = norm2(dx);
  
-        if (r2 >= rtab2) 
+        if(r2>=fr->tab14.r*fr->tab14.r)
          {
-            if (!bWarn) 
+            if(warned_rlimit==FALSE)
              {
-                fprintf(stderr,"Warning: 1-4 interaction between %d and %d "
-                        "at distance %.3f which is larger than the 1-4 table size %.3f nm\n", 
-                       glatnr(global_atom_index,ai),
-                       glatnr(global_atom_index,aj),
-                       sqrt(r2), sqrt(rtab2));
-                fprintf(stderr,"These are ignored for the rest of the simulation\n");
-                fprintf(stderr,"This usually means your system is exploding,\n"
-                        "if not, you should increase table-extension in your mdp file\n"
-                        "or with user tables increase the table size\n");
-                bWarn = TRUE;
+                nb_listed_warning_rlimit(x,ai,aj,global_atom_index,sqrt(r2),fr->tab14.r);
+                warned_rlimit=TRUE;
              }
-            if (debug) 
-             fprintf(debug,"%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
-                     x[ai][XX],x[ai][YY],x[ai][ZZ],
-                     x[aj][XX],x[aj][YY],x[aj][ZZ],
-                     glatnr(global_atom_index,ai),
-                     glatnr(global_atom_index,aj),
-                     sqrt(r2));
+            continue;
          }
-        else 
+
+        if (bFreeEnergy)
          {
-            copy_rvec(x[ai],x14[0]);
-            copy_rvec(x[aj],x14[1]);
-            clear_rvec(f14[0]);
-            clear_rvec(f14[1]);
-#ifdef DEBUG
-            fprintf(debug,"LJ14: grp-i=%2d, grp-j=%2d, ngrp=%2d, GID=%d\n",
-                    md->cENER[ai],md->cENER[aj],md->nenergrp,gid);
-#endif
-            
-           outeriter = inneriter = count = 0;
-           if (bFreeEnergy)
+            /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
+            qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
+            c6B              = iparams[itype].lj14.c6B*6.0;
+            c12B             = iparams[itype].lj14.c12B*12.0;
+
+            fscal            = nb_free_energy_evaluate_single(r2,fr->sc_r_power,fr->sc_alphacoul,fr->sc_alphavdw,
+                                                              fr->tab14.scale,fr->tab14.data,qq,c6,c12,qqB,c6B,c12B,
+                                                              LFC,LFV,DLF,lfac_coul,lfac_vdw,dlfac_coul,dlfac_vdw,
+                                                              fr->sc_sigma6_def,fr->sc_sigma6_min,sigma2_def,sigma2_min,&velec,&vvdw,dvdl);
+        }
+        else
          {
-            chargeB[0] = md->chargeB[ai];
-            chargeB[1] = md->chargeB[aj];
-            /* We pass &(iparams[itype].lj14.c6A) as LJ parameter matrix
-             * to the innerloops.
-             * Here we use that the LJ-14 parameters are stored in iparams
-             * as c6A,c12A,c6B,c12B, which are referenced correctly
-             * in the innerloops if we assign type combinations 0-0 and 0-1
-             * to atom pair ai-aj in topologies A and B respectively.
-             */
-
-            /* need to do a bit of a kludge here -- the way it is set up,
-               if the charges change, but the vdw do not, then even though bFreeEnergy is on,
-               it won't work, because all the bonds are perturbed.
-            */
-            if(ivdw==enbvdwBHAM)
-            {
-                gmx_fatal(FARGS,"Cannot do free energy Buckingham interactions.");
-            }
-            count = 0;
-            gmx_nb_free_energy_kernel(icoul,
-                                      ivdw,
-                                      i1,
-                                      &i0,
-                                      j_index,
-                                      &i1,
-                                      &shift_f,
-                                      fr->shift_vec[0],
-                                      fshift[0],
-                                      &gid,
-                                      x14[0],
-                                      f14[0],
-                                      chargeA,
-                                      chargeB,
-                                      eps,
-                                      krf,
-                                      crf,
-                                      fr->ewaldcoeff,
-                                      egcoul,
-                                      typeA,
-                                      typeB,
-                                      ntype,
-                                      nbfp,
-                                      egnb,
-                                      tabscale,
-                                      tab,
-                                      lambda[efptCOUL],
-                                      lambda[efptVDW],
-                                      dvdl,
-                                      fr->sc_alphacoul,
-                                      fr->sc_alphavdw,
-                                      fr->sc_power,
-                                      6.0, /* for 1-4's use the 6 power always - 48 power too high because of where they are forced to be */
-                                      fr->sc_sigma6_def,
-                                      fr->sc_sigma6_min,
-                                      TRUE,
-                                      &outeriter,
-                                      &inneriter);
+            /* Evaluate tabulated interaction without free energy */
+            fscal            = nb_evaluate_single(r2,fr->tab14.scale,fr->tab14.data,qq,c6,c12,&velec,&vvdw);
          }
-        else 
-        { 
-          if (fr->adress_type==eAdressOff || !fr->adress_do_hybridpairs){
-            /* Not perturbed - call kernel 330 */
-            nb_kernel330
-                ( &i1,
-                  &i0,
-                  j_index,
-                  &i1,
-                  &shift_f,
-                  fr->shift_vec[0],
-                  fshift[0],
-                  &gid,
-                  x14[0],
-                  f14[0],
-                  chargeA,
-                  &eps,
-                  &krf,
-                  &crf,
-                  egcoul,
-                  typeA,
-                  &ntype,
-                  nbfp,
-                  egnb,
-                  &tabscale,
-                  tab,
-                  NULL,
-                  NULL,
-                  NULL,
-                  NULL,
-                  &nthreads,
-                  &count,
-                  (void *)&mtx,
-                  &outeriter,
-                  &inneriter,
-                  NULL);                
-                } else {
-                    if (bCG) {
-                        nb_kernel330_adress_cg(&i1,
-                                &i0,
-                                j_index,
-                                &i1,
-                                &shift_f,
-                                fr->shift_vec[0],
-                                fshift[0],
-                                &gid,
-                                x14[0],
-                                f14[0],
-                                chargeA,
-                                &eps,
-                                &krf,
-                                &crf,
-                                egcoul,
-                                typeA,
-                                &ntype,
-                                nbfp,
-                                egnb,
-                                &tabscale,
-                                tab,
-                                NULL,
-                                NULL,
-                                NULL,
-                                NULL,
-                                &nthreads,
-                                &count,
-                                (void *) &mtx,
-                                &outeriter,
-                                &inneriter,
-                                fr->adress_ex_forcecap,
-                                wf14);
-                    } else {
-                        nb_kernel330_adress_ex(&i1,
-                                &i0,
-                                j_index,
-                                &i1,
-                                &shift_f,
-                                fr->shift_vec[0],
-                                fshift[0],
-                                &gid,
-                                x14[0],
-                                f14[0],
-                                chargeA,
-                                &eps,
-                                &krf,
-                                &crf,
-                                egcoul,
-                                typeA,
-                                &ntype,
-                                nbfp,
-                                egnb,
-                                &tabscale,
-                                tab,
-                                NULL,
-                                NULL,
-                                NULL,
-                                NULL,
-                                &nthreads,
-                                &count,
-                                (void *) &mtx,
-                                &outeriter,
-                                &inneriter,
-                                fr->adress_ex_forcecap,
-                                wf14);
-                    }
  
-                }
-            }
-        
+        energygrp_elec[gid]  += velec;
+        energygrp_vdw[gid]   += vvdw;
+        svmul(fscal,dx,dx);
+
          /* Add the forces */
-        rvec_inc(f[ai],f14[0]);
-        rvec_dec(f[aj],f14[0]);
-        
-        if (g) 
+        rvec_inc(f[ai],dx);
+        rvec_dec(f[aj],dx);
+
+        if (g)
          {
              /* Correct the shift forces using the graph */
-            ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);    
-            shift_vir = IVEC2IS(dt);
-            rvec_inc(fshift[shift_vir],f14[0]);
-            rvec_dec(fshift[CENTRAL],f14[0]);
+            ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
+            fshift_index = IVEC2IS(dt);
          }
-        
-           /* flops: eNR_KERNEL_OUTER + eNR_KERNEL330 + 12 */
+        if(fshift_index!=CENTRAL)
+        {
+            rvec_inc(fshift[fshift_index],dx);
+            rvec_dec(fshift[CENTRAL],dx);
          }
      }
      return 0.0;
diff --git a/src/gmxlib/nonbonded/preprocessor/gmxpreprocess.py b/src/gmxlib/nonbonded/preprocessor/gmxpreprocess.py

new file mode 100755 (executable)

index 0000000..46619c6
--- /dev/null
+++ b/src/gmxlib/nonbonded/preprocessor/gmxpreprocess.py
@@ -0,0 +1,963 @@
+#!/usr/bin/env python
+# Copyright (c) 2002-2008 ActiveState Software Inc.
+# License: MIT License (http://www.opensource.org/licenses/mit-license.php)
+# Original filename preprocess.py, see http://code.google.com/p/preprocess/
+#
+# Modified by Erik Lindahl 2009-2012 <lindahl@gromacs.org>
+# to enable advanced preprocessing for Gromacs kernels, including
+# preprocessor for-loops and substitution into preprocessor directives
+# as well as program strings.
+#
+# Please feel free to redistribute under same license as original (MIT),
+# but don't blame the original authors for mistakes in this version.
+#
+
+"""
+    Preprocess a file.
+
+    Command Line Usage:
+        gmxpreprocess [<options>...] <infile>
+
+    Options:
+        -h, --help      Print this help and exit.
+        -V, --version   Print the version info and exit.
+        -v, --verbose   Give verbose output for errors.
+
+        -o <outfile>    Write output to the given file instead of to stdout.
+        -f, --force     Overwrite given output file. (Otherwise an IOError
+                        will be raised if <outfile> already exists.
+        -D <define>     Define a variable for preprocessing. <define>
+                        can simply be a variable name (in which case it
+                        will be true) or it can be of the form
+                        <var>=<val>. An attempt will be made to convert
+                        <val> to an integer so "-D FOO=0" will create a
+                        false value.
+        -I <dir>        Add an directory to the include path for
+                        #include directives.
+
+        -k, --keep-lines    Emit empty lines for preprocessor statement
+                        lines and skipped output lines. This allows line
+                        numbers to stay constant.
+        -s, --no-substitute  Do NOT Substitute defines into emitted lines.
+        -c, --content-types-path <path>
+                        Specify a path to a content.types file to assist
+                        with filetype determination. See the
+                        `_gDefaultContentTypes` string in this file for
+                        details on its format.
+
+    Module Usage:
+        from gmxpreprocess import gmxpreprocess
+        gmxpreprocess(infile, outfile=sys.stdout, defines={}, force=0,
+                      keepLines=0, includePath=[], substitute=1,
+                      contentType=None)
+
+    The <infile> can be marked up with special preprocessor statement lines
+    of the form:
+        <comment-prefix> <preprocessor-statement> <comment-suffix>
+    where the <comment-prefix/suffix> are the native comment delimiters for
+    that file type.
+
+
+    Examples
+    --------
+
+    HTML (*.htm, *.html) or XML (*.xml, *.kpf, *.xul) files:
+
+        <!-- #if FOO -->
+        ...
+        <!-- #endif -->
+
+    Python (*.py), Perl (*.pl), Tcl (*.tcl), Ruby (*.rb), Bash (*.sh),
+    or make ([Mm]akefile*) files:
+
+        # #if defined('FAV_COLOR') and FAV_COLOR == "blue"
+        ...
+        # #elif FAV_COLOR == "red"
+        ...
+        # #else
+        ...
+        # #endif
+
+    C (*.c, *.h), C++ (*.cpp, *.cxx, *.cc, *.h, *.hpp, *.hxx, *.hh),
+    Java (*.java), PHP (*.php) or C# (*.cs) files:
+
+        // #define FAV_COLOR 'blue'
+        ...
+        /* #ifndef FAV_COLOR */
+        ...
+        // #endif
+
+    Fortran 77 (*.f) or 90/95 (*.f90) files:
+
+        C     #if COEFF == 'var'
+              ...
+        C     #endif
+
+    And other languages.
+
+
+    Preprocessor Syntax
+    -------------------
+
+    - Valid statements:
+        #define <var> [<value>]
+        #undef <var>
+        #ifdef <var>
+        #ifndef <var>
+        #if <expr>
+        #elif <expr>
+        #else
+        #endif
+        #error <error string>
+        #include "<file>"
+        #include <var>
+      where <expr> is any valid Python expression.
+    - The expression after #if/elif may be a Python statement. It is an
+      error to refer to a variable that has not been defined by a -D
+      option or by an in-content #define.
+    - Special built-in methods for expressions:
+        defined(varName)    Return true if given variable is defined.
+
+
+    Tips
+    ----
+
+    A suggested file naming convention is to let input files to
+    preprocess be of the form <basename>.p.<ext> and direct the output
+    of preprocess to <basename>.<ext>, e.g.:
+        preprocess -o foo.py foo.p.py
+    The advantage is that other tools (esp. editors) will still
+    recognize the unpreprocessed file as the original language.
+"""
+
+__version_info__ = (1, 1, 0)
+__version__ = '.'.join(map(str, __version_info__))
+
+import os
+import sys
+import getopt
+import types
+import re
+import pprint
+
+
+
+#---- exceptions
+
+class PreprocessError(Exception):
+    def __init__(self, errmsg, file=None, lineno=None, line=None):
+        self.errmsg = str(errmsg)
+        self.file = file
+        self.lineno = lineno
+        self.line = line
+        Exception.__init__(self, errmsg, file, lineno, line)
+    def __str__(self):
+        s = ""
+        if self.file is not None:
+            s += self.file + ":"
+        if self.lineno is not None:
+            s += str(self.lineno) + ":"
+        if self.file is not None or self.lineno is not None:
+            s += " "
+        s += self.errmsg
+        #if self.line is not None:
+        #    s += ": " + self.line
+        return s
+
+
+
+#---- global data
+
+# Comment delimiter info.
+#   A mapping of content type to a list of 2-tuples defining the line
+#   prefix and suffix for a comment. Each prefix or suffix can either
+#   be a string (in which case it is transformed into a pattern allowing
+#   whitespace on either side) or a compiled regex.
+_commentGroups = {
+    "Python":     [ ('#', '') ],
+    "Perl":       [ ('#', '') ],
+    "PHP":        [ ('/*', '*/'), ('//', ''), ('#', '') ],
+    "Ruby":       [ ('#', '') ],
+    "Tcl":        [ ('#', '') ],
+    "Shell":      [ ('#', '') ],
+    # Allowing for CSS and JavaScript comments in XML/HTML.
+    "XML":        [ ('<!--', '-->'), ('/*', '*/'), ('//', '') ],
+    "HTML":       [ ('<!--', '-->'), ('/*', '*/'), ('//', '') ],
+    "Makefile":   [ ('#', '') ],
+    "JavaScript": [ ('/*', '*/'), ('//', '') ],
+    "CSS":        [ ('/*', '*/') ],
+    "C":          [ ('/*', '*/') ],
+    "C++":        [ ('/*', '*/'), ('//', '') ],
+    "Java":       [ ('/*', '*/'), ('//', '') ],
+    "C#":         [ ('/*', '*/'), ('//', '') ],
+    "IDL":        [ ('/*', '*/'), ('//', '') ],
+    "Text":       [ ('#', '') ],
+    "Fortran":    [ (re.compile(r'^[a-zA-Z*$]\s*'), ''), ('!', '') ],
+    "TeX":        [ ('%', '') ],
+}
+
+
+
+#---- internal logging facility
+
+class _Logger:
+    DEBUG, INFO, WARN, ERROR, CRITICAL = range(5)
+    def __init__(self, name, level=None, streamOrFileName=sys.stderr):
+        self._name = name
+        if level is None:
+            self.level = self.WARN
+        else:
+            self.level = level
+        if type(streamOrFileName) == types.StringType:
+            self.stream = open(streamOrFileName, 'w')
+            self._opennedStream = 1
+        else:
+            self.stream = streamOrFileName
+            self._opennedStream = 0
+    def __del__(self):
+        if self._opennedStream:
+            self.stream.close()
+    def getLevel(self):
+        return self.level
+    def setLevel(self, level):
+        self.level = level
+    def _getLevelName(self, level):
+        levelNameMap = {
+            self.DEBUG: "DEBUG",
+            self.INFO: "INFO",
+            self.WARN: "WARN",
+            self.ERROR: "ERROR",
+            self.CRITICAL: "CRITICAL",
+        }
+        return levelNameMap[level]
+    def isEnabled(self, level):
+        return level >= self.level
+    def isDebugEnabled(self): return self.isEnabled(self.DEBUG)
+    def isInfoEnabled(self): return self.isEnabled(self.INFO)
+    def isWarnEnabled(self): return self.isEnabled(self.WARN)
+    def isErrorEnabled(self): return self.isEnabled(self.ERROR)
+    def isFatalEnabled(self): return self.isEnabled(self.FATAL)
+    def log(self, level, msg, *args):
+        if level < self.level:
+            return
+        message = "%s: %s: " % (self._name, self._getLevelName(level).lower())
+        message = message + (msg % args) + "\n"
+        self.stream.write(message)
+        self.stream.flush()
+    def debug(self, msg, *args):
+        self.log(self.DEBUG, msg, *args)
+    def info(self, msg, *args):
+        self.log(self.INFO, msg, *args)
+    def warn(self, msg, *args):
+        self.log(self.WARN, msg, *args)
+    def error(self, msg, *args):
+        self.log(self.ERROR, msg, *args)
+    def fatal(self, msg, *args):
+        self.log(self.CRITICAL, msg, *args)
+
+log = _Logger("gmxpreprocess", _Logger.WARN)
+
+
+
+#---- internal support stuff
+
+def SubstituteInternal(expr, defines):
+    prevexpr = ''
+    while (expr!=prevexpr):
+        prevexpr=expr
+        for name in reversed(sorted(defines, key=len)):
+            value = defines[name]
+            expr = expr.replace(name, str(value))
+    return expr
+
+def SubstituteInCode(expr, defines):
+    prevexpr = ''
+    while (expr!=prevexpr):
+        prevexpr=expr
+        for name in reversed(sorted(defines, key=len)):
+            value = defines[name]
+            expr = expr.replace('{' + name + '}', str(value))
+    return expr
+
+
+def _evaluate(expr, defines):
+    """Evaluate the given expression string with the given context.
+
+    WARNING: This runs eval() on a user string. This is unsafe.
+    """
+    #interpolated = _interpolate(s, defines)
+
+    try:
+        rv = eval(expr, {'defined':lambda v: v in defines}, defines)
+    except Exception, ex:
+        msg = str(ex)
+        if msg.startswith("name '") and msg.endswith("' is not defined"):
+            # A common error (at least this is presumed:) is to have
+            #   defined(FOO)   instead of   defined('FOO')
+            # We should give a little as to what might be wrong.
+            # msg == "name 'FOO' is not defined"  -->  varName == "FOO"
+            varName = msg[len("name '"):-len("' is not defined")]
+            if expr.find("defined(%s)" % varName) != -1:
+                # "defined(FOO)" in expr instead of "defined('FOO')"
+                msg += " (perhaps you want \"defined('%s')\" instead of "\
+                       "\"defined(%s)\")" % (varName, varName)
+        elif msg.startswith("invalid syntax"):
+            msg = "invalid syntax: '%s'" % expr
+        raise PreprocessError(msg, defines['__FILE__'], defines['__LINE__'])
+    log.debug("evaluate %r -> %s (defines=%r)", expr, rv, defines)
+
+    return rv
+
+#---- module API
+
+def gmxpreprocess(infile, outfile=sys.stdout, defines={},
+               force=0, keepLines=0, includePath=[], substitute=1,
+               contentType=None, contentTypesRegistry=None,
+               __preprocessedFiles=None):
+    """Preprocess the given file.
+
+    "infile" is the input path.
+    "outfile" is the output path or stream (default is sys.stdout).
+    "defines" is a dictionary of defined variables that will be
+        understood in preprocessor statements. Keys must be strings and,
+        currently, only the truth value of any key's value matters.
+    "force" will overwrite the given outfile if it already exists. Otherwise
+        an IOError will be raise if the outfile already exists.
+    "keepLines" will cause blank lines to be emitted for preprocessor lines
+        and content lines that would otherwise be skipped.
+    "includePath" is a list of directories to search for given #include
+        directives. The directory of the file being processed is presumed.
+    "substitute", if true, will allow substitution of defines into emitted
+        lines. (NOTE: This substitution will happen within program strings
+        as well. This may not be what you expect.)
+    "contentType" can be used to specify the content type of the input
+        file. It not given, it will be guessed.
+    "contentTypesRegistry" is an instance of ContentTypesRegistry. If not specified
+        a default registry will be created.
+    "__preprocessedFiles" (for internal use only) is used to ensure files
+        are not recusively preprocessed.
+
+    Returns the modified dictionary of defines or raises PreprocessError if
+    there was some problem.
+    """
+    if __preprocessedFiles is None:
+        __preprocessedFiles = []
+    log.info("preprocess(infile=%r, outfile=%r, defines=%r, force=%r, "\
+             "keepLines=%r, includePath=%r, contentType=%r, "\
+             "__preprocessedFiles=%r)", infile, outfile, defines, force,
+             keepLines, includePath, contentType, __preprocessedFiles)
+    absInfile = os.path.normpath(os.path.abspath(infile))
+    if absInfile in __preprocessedFiles:
+        raise PreprocessError("detected recursive #include of '%s'"\
+                              % infile)
+    __preprocessedFiles.append(os.path.abspath(infile))
+
+    # Determine the content type and comment info for the input file.
+    if contentType is None:
+        registry = contentTypesRegistry or getDefaultContentTypesRegistry()
+        contentType = registry.getContentType(infile)
+        if contentType is None:
+            contentType = "Text"
+            log.warn("defaulting content type for '%s' to '%s'",
+                     infile, contentType)
+    try:
+        cgs = _commentGroups[contentType]
+    except KeyError:
+        raise PreprocessError("don't know comment delimiters for content "\
+                              "type '%s' (file '%s')"\
+                              % (contentType, infile))
+
+    # Generate statement parsing regexes. Basic format:
+    #       <comment-prefix> <preprocessor-stmt> <comment-suffix>
+    #  Examples:
+    #       <!-- #if foo -->
+    #       ...
+    #       <!-- #endif -->
+    #
+    #       # #if BAR
+    #       ...
+    #       # #else
+    #       ...
+    #       # #endif
+    stmts = ['##\s*(?P<op>.*?)',
+             '#\s*(?P<op>if|elif|ifdef|ifndef)\s+(?P<expr>.*?)',
+             '#\s*(?P<op>else|endif)',
+             '#\s*(?P<op>error)\s+(?P<error>.*?)',
+             '#\s*(?P<op>define)\s+(?P<var>[^\s]*?)(\s+(?P<val>.+?))?',
+             '#\s*(?P<op>undef)\s+(?P<var>[^\s]*?)',
+             '#\s*(?P<op>for)\s+(?P<var>.*?)\s+((in)|(IN))\s+(?P<valuelist>.*?)',
+             '#\s*(?P<op>endfor)',
+             '#\s*(?P<op>include)\s+"(?P<fname>.*?)"',
+             r'#\s*(?P<op>include)\s+(?P<var>[^\s]+?)',
+            ]
+    patterns = []
+    for stmt in stmts:
+        # The comment group prefix and suffix can either be just a
+        # string or a compiled regex.
+        for cprefix, csuffix in cgs:
+            if hasattr(cprefix, "pattern"):
+                pattern = cprefix.pattern
+            else:
+                pattern = r"^\s*%s\s*" % re.escape(cprefix)
+            pattern += stmt
+            if hasattr(csuffix, "pattern"):
+                pattern += csuffix.pattern
+            else:
+                pattern += r"\s*%s\s*$" % re.escape(csuffix)
+            patterns.append(pattern)
+    stmtRes = [re.compile(p) for p in patterns]
+
+    # Process the input file.
+    # (Would be helpful if I knew anything about lexing and parsing
+    # simple grammars.)
+    fin = open(infile, 'r')
+    lines = fin.readlines()
+    # Merge multi-line comments
+    for i in range(len(lines)-1,-1,-1):
+        line = lines[i].rstrip(' \r\n')
+        if len(line)>0 and line[-1]=='\\':
+            lines[i] = line[:-1] + ' ' + lines[i+1]
+            lines[i+1] = ''    # keep an empty line to avoid screwing up line numbers
+
+    fin.close()
+    if type(outfile) in types.StringTypes:
+        if force and os.path.exists(outfile):
+            os.chmod(outfile, 0777)
+            os.remove(outfile)
+        fout = open(outfile, 'w')
+    else:
+        fout = outfile
+
+    defines['__FILE__'] = infile
+    SKIP, EMIT = range(2) # states
+    states = [(EMIT,   # a state is (<emit-or-skip-lines-in-this-section>,
+               0,      #             <have-emitted-in-this-if-block>,
+               0)]     #             <have-seen-'else'-in-this-if-block>)
+    lineNum = 0
+    nlines = len(lines)
+    forlevel = 0
+    forvar = {}
+    forvaluelist = {}
+    forstartline = {}
+    foriteration = {}
+    last_emitted_was_blank = True
+
+    while lineNum<nlines:
+
+        line = lines[lineNum]
+
+        log.debug("line %d: %r", lineNum+1, line)
+        defines['__LINE__'] = lineNum+1
+
+        # Is this line a preprocessor stmt line?
+        #XXX Could probably speed this up by optimizing common case of
+        #    line NOT being a preprocessor stmt line.
+        for stmtRe in stmtRes:
+            match = stmtRe.match(line)
+            if match:
+                break
+        else:
+            match = None
+
+        if match:
+
+            # Remove contents after ## (comment)
+            idx=line.find("##")
+            if(idx>0):
+                line = line[0:idx]
+
+            op = match.group("op")
+            log.debug("%r stmt (states: %r)", op, states)
+            if op == "define":
+                if not (states and states[-1][0] == SKIP):
+                    var, val = match.group("var", "val")
+                    val = SubstituteInternal(str(val), defines)
+
+                    if val is None:
+                        val = None
+                    else:
+                        try:
+                            val = eval(val, {}, {})
+                        except:
+                            pass
+                    defines[var] = val
+            elif op == "undef":
+                if not (states and states[-1][0] == SKIP):
+                    var = match.group("var")
+                    try:
+                        del defines[var]
+                    except KeyError:
+                        pass
+            elif op == "include":
+                if not (states and states[-1][0] == SKIP):
+                    if "var" in match.groupdict():
+                        # This is the second include form: #include VAR
+                        var = match.group("var")
+                        f = defines[var]
+                    else:
+                        # This is the first include form: #include "path"
+                        f = match.group("fname")
+
+                    for d in [os.path.dirname(infile)] + includePath:
+                        fname = os.path.normpath(os.path.join(d, f))
+                        if os.path.exists(fname):
+                            break
+                    else:
+                        raise PreprocessError("could not find #include'd file "\
+                                              "\"%s\" on include path: %r"\
+                                              % (f, includePath))
+                    defines = gmxpreprocess(fname, fout, defines, force,
+                                            keepLines, includePath, substitute,
+                                            contentTypesRegistry=contentTypesRegistry,
+                                            __preprocessedFiles=__preprocessedFiles)
+            elif op in ("if", "ifdef", "ifndef"):
+                if op == "if":
+                    expr = match.group("expr")
+                elif op == "ifdef":
+                    expr = "defined('%s')" % match.group("expr")
+                elif op == "ifndef":
+                    expr = "not defined('%s')" % match.group("expr")
+                try:
+                    if states and states[-1][0] == SKIP:
+                        # Were are nested in a SKIP-portion of an if-block.
+                        states.append((SKIP, 0, 0))
+                    elif _evaluate(expr, defines):
+                        states.append((EMIT, 1, 0))
+                    else:
+                        states.append((SKIP, 0, 0))
+                except KeyError:
+                    raise PreprocessError("use of undefined variable in "\
+                                          "#%s stmt" % op, defines['__FILE__'],
+                                          defines['__LINE__'], line)
+            elif op == "elif":
+                expr = match.group("expr")
+                try:
+                    if states[-1][2]: # already had #else in this if-block
+                        raise PreprocessError("illegal #elif after #else in "\
+                            "same #if block", defines['__FILE__'],
+                            defines['__LINE__'], line)
+                    elif states[-1][1]: # if have emitted in this if-block
+                        states[-1] = (SKIP, 1, 0)
+                    elif states[:-1] and states[-2][0] == SKIP:
+                        # Were are nested in a SKIP-portion of an if-block.
+                        states[-1] = (SKIP, 0, 0)
+                    elif _evaluate(expr, defines):
+                        states[-1] = (EMIT, 1, 0)
+                    else:
+                        states[-1] = (SKIP, 0, 0)
+                except IndexError:
+                    raise PreprocessError("#elif stmt without leading #if "\
+                                          "stmt", defines['__FILE__'],
+                                          defines['__LINE__'], line)
+            elif op == "else":
+                try:
+                    if states[-1][2]: # already had #else in this if-block
+                        raise PreprocessError("illegal #else after #else in "\
+                            "same #if block", defines['__FILE__'],
+                            defines['__LINE__'], line)
+                    elif states[-1][1]: # if have emitted in this if-block
+                        states[-1] = (SKIP, 1, 1)
+                    elif states[:-1] and states[-2][0] == SKIP:
+                        # Were are nested in a SKIP-portion of an if-block.
+                        states[-1] = (SKIP, 0, 1)
+                    else:
+                        states[-1] = (EMIT, 1, 1)
+                except IndexError:
+                    raise PreprocessError("#else stmt without leading #if "\
+                                          "stmt", defines['__FILE__'],
+                                          defines['__LINE__'], line)
+            elif op == "endif":
+                try:
+                    states.pop()
+                except IndexError:
+                    raise PreprocessError("#endif stmt without leading #if"\
+                                          "stmt", defines['__FILE__'],
+                                          defines['__LINE__'], line)
+            elif op == "for":
+
+                tmpstr     = match.group("var")
+                thisforvar = tmpstr.split(",")
+                for s in thisforvar:
+                    s.strip(" ")
+
+                # Thisforvar is now a _list_ if 1 or more for variables, without whitespace
+
+                # Evaluate the list-of-values just in case it refers to a list variable
+                valuelist = _evaluate(match.group("valuelist"),defines)
+                # If a string, evaluate it again
+                if(isinstance(valuelist,str)):
+                    valuelist = eval(valuelist)
+
+                forlevel += 1
+
+                forvar[forlevel]       = thisforvar
+                forvaluelist[forlevel] = valuelist
+                forstartline[forlevel] = lineNum + 1
+                foriteration[forlevel] = 0
+
+                if(len(valuelist)>0):
+                    # set the variable for this for-loop to the first value in the list for this level
+                    nvar=len(thisforvar)
+                    for i in range(nvar):
+                        if(nvar==1):
+                            val=valuelist[0]
+                        else:
+                            val=valuelist[0][i]
+                        defines[thisforvar[i]] = val
+
+                else:
+                    # list was empty, so skip this entire section
+                    states.append((SKIP, 0, 0))
+
+            elif op == "endfor":
+                foriteration[forlevel] += 1
+                # Should we do one more iteration on this level?
+                iter       = foriteration[forlevel]
+                thisforvar = forvar[forlevel]
+                valuelist  = forvaluelist[forlevel]
+
+                if(iter<len(valuelist)):
+
+                    nvar = len(thisforvar)
+                    for i in range(len(thisforvar)):
+                        if(nvar==1):
+                           val=valuelist[iter]
+                        else:
+                            val=valuelist[iter][i]
+                        defines[thisforvar[i]] = val
+
+                    lineNum             = forstartline[forlevel]
+                    continue
+                else:
+                    forlevel -= 1
+                    if(len(valuelist)==0):
+                        states.pop()
+
+            elif op == "error":
+                if not (states and states[-1][0] == SKIP):
+                    error = match.group("error")
+                    raise PreprocessError("#error: "+error, defines['__FILE__'],
+                                          defines['__LINE__'], line)
+            log.debug("states: %r", states)
+            if keepLines:
+                fout.write("\n")
+        else:
+            try:
+                if states[-1][0] == EMIT:
+                    log.debug("emit line (%s)" % states[-1][1])
+                    # Substitute all defines into line.
+                    # XXX Should avoid recursive substitutions. But that
+                    #     would be a pain right now.
+
+                    sline = line
+                    if substitute:
+                        sline = SubstituteInCode(sline,defines)
+
+                    emitted_line_is_blank = (sline.strip()=='')
+                    if( not (emitted_line_is_blank and last_emitted_was_blank) and not keepLines):
+                        fout.write(sline)
+                        last_emitted_was_blank = emitted_line_is_blank
+
+                elif keepLines:
+                    log.debug("keep blank line (%s)" % states[-1][1])
+                    fout.write("\n")
+                else:
+                    log.debug("skip line (%s)" % states[-1][1])
+            except IndexError:
+                raise PreprocessError("superfluous #endif before this line",
+                                      defines['__FILE__'],
+                                      defines['__LINE__'])
+        lineNum += 1
+
+    if len(states) > 1:
+        raise PreprocessError("unterminated #if block", defines['__FILE__'],
+                              defines['__LINE__'])
+    elif len(states) < 1:
+        raise PreprocessError("superfluous #endif on or before this line",
+                              defines['__FILE__'], defines['__LINE__'])
+
+    if fout != outfile:
+        fout.close()
+
+    return defines
+
+
+#---- content-type handling
+
+_gDefaultContentTypes = """
+    # Default file types understood by "gmxpreprocess.py".
+    #
+    # Format is an extension of 'mime.types' file syntax.
+    #   - '#' indicates a comment to the end of the line.
+    #   - a line is:
+    #       <filetype> [<pattern>...]
+    #     where,
+    #       <filetype>'s are equivalent in spirit to the names used in the Windows
+    #           registry in HKCR, but some of those names suck or are inconsistent;
+    #           and
+    #       <pattern> is a suffix (pattern starts with a '.'), a regular expression
+    #           (pattern is enclosed in '/' characters), a full filename (anything
+    #           else).
+    #
+    # Notes on case-sensitivity:
+    #
+    # A suffix pattern is case-insensitive on Windows and case-sensitive
+    # elsewhere.  A filename pattern is case-sensitive everywhere. A regex
+    # pattern's case-sensitivity is defined by the regex. This means it is by
+    # default case-sensitive, but this can be changed using Python's inline
+    # regex option syntax. E.g.:
+    #         Makefile            /^(?i)makefile.*$/   # case-INsensitive regex
+
+    Python              .py
+    Python              .pyw
+    Perl                .pl
+    Ruby                .rb
+    Tcl                 .tcl
+    XML                 .xml
+    XML                 .kpf
+    XML                 .xul
+    XML                 .rdf
+    XML                 .xslt
+    XML                 .xsl
+    XML                 .wxs
+    XML                 .wxi
+    HTML                .htm
+    HTML                .html
+    XML                 .xhtml
+    Makefile            /^[Mm]akefile.*$/
+    PHP                 .php
+    JavaScript          .js
+    CSS                 .css
+    C++                 .c       # C++ because then we can use //-style comments
+    C++                 .cpp
+    C++                 .cxx
+    C++                 .cc
+    C++                 .h
+    C++                 .hpp
+    C++                 .hxx
+    C++                 .hh
+    C++                 .gpp     # Gromacs pre-preprocessing
+    IDL                 .idl
+    Text                .txt
+    Fortran             .f
+    Fortran             .f90
+    Shell               .sh
+    Shell               .csh
+    Shell               .ksh
+    Shell               .zsh
+    Java                .java
+    C#                  .cs
+    TeX                 .tex
+
+    # Some Komodo-specific file extensions
+    Python              .ksf  # Fonts & Colors scheme files
+    Text                .kkf  # Keybinding schemes files
+"""
+
+class ContentTypesRegistry:
+    """A class that handles determining the filetype of a given path.
+
+    Usage:
+        >>> registry = ContentTypesRegistry()
+        >>> registry.getContentType("foo.py")
+        "Python"
+    """
+
+    def __init__(self, contentTypesPaths=None):
+        self.contentTypesPaths = contentTypesPaths
+        self._load()
+
+    def _load(self):
+        from os.path import dirname, join, exists
+
+        self.suffixMap = {}
+        self.regexMap = {}
+        self.filenameMap = {}
+
+        self._loadContentType(_gDefaultContentTypes)
+        localContentTypesPath = join(dirname(__file__), "content.types")
+        if exists(localContentTypesPath):
+            log.debug("load content types file: `%r'" % localContentTypesPath)
+            self._loadContentType(open(localContentTypesPath, 'r').read())
+        for path in (self.contentTypesPaths or []):
+            log.debug("load content types file: `%r'" % path)
+            self._loadContentType(open(path, 'r').read())
+
+    def _loadContentType(self, content, path=None):
+        """Return the registry for the given content.types file.
+
+        The registry is three mappings:
+            <suffix> -> <content type>
+            <regex> -> <content type>
+            <filename> -> <content type>
+        """
+        for line in content.splitlines(0):
+            words = line.strip().split()
+            for i in range(len(words)):
+                if words[i][0] == '#':
+                    del words[i:]
+                    break
+            if not words: continue
+            contentType, patterns = words[0], words[1:]
+            if not patterns:
+                if line[-1] == '\n': line = line[:-1]
+                raise PreprocessError("bogus content.types line, there must "\
+                                      "be one or more patterns: '%s'" % line)
+            for pattern in patterns:
+                if pattern.startswith('.'):
+                    if sys.platform.startswith("win"):
+                        # Suffix patterns are case-insensitive on Windows.
+                        pattern = pattern.lower()
+                    self.suffixMap[pattern] = contentType
+                elif pattern.startswith('/') and pattern.endswith('/'):
+                    self.regexMap[re.compile(pattern[1:-1])] = contentType
+                else:
+                    self.filenameMap[pattern] = contentType
+
+    def getContentType(self, path):
+        """Return a content type for the given path.
+
+        @param path {str} The path of file for which to guess the
+            content type.
+        @returns {str|None} Returns None if could not determine the
+            content type.
+        """
+        basename = os.path.basename(path)
+        contentType = None
+        # Try to determine from the path.
+        if not contentType and self.filenameMap.has_key(basename):
+            contentType = self.filenameMap[basename]
+            log.debug("Content type of '%s' is '%s' (determined from full "\
+                      "path).", path, contentType)
+        # Try to determine from the suffix.
+        if not contentType and '.' in basename:
+            suffix = "." + basename.split(".")[-1]
+            if sys.platform.startswith("win"):
+                # Suffix patterns are case-insensitive on Windows.
+                suffix = suffix.lower()
+            if self.suffixMap.has_key(suffix):
+                contentType = self.suffixMap[suffix]
+                log.debug("Content type of '%s' is '%s' (determined from "\
+                          "suffix '%s').", path, contentType, suffix)
+        # Try to determine from the registered set of regex patterns.
+        if not contentType:
+            for regex, ctype in self.regexMap.items():
+                if regex.search(basename):
+                    contentType = ctype
+                    log.debug("Content type of '%s' is '%s' (matches regex '%s')",
+                              path, contentType, regex.pattern)
+                    break
+        # Try to determine from the file contents.
+        content = open(path, 'rb').read()
+        if content.startswith("<?xml"):  # cheap XML sniffing
+            contentType = "XML"
+        return contentType
+
+_gDefaultContentTypesRegistry = None
+def getDefaultContentTypesRegistry():
+    global _gDefaultContentTypesRegistry
+    if _gDefaultContentTypesRegistry is None:
+        _gDefaultContentTypesRegistry = ContentTypesRegistry()
+    return _gDefaultContentTypesRegistry
+
+
+#---- internal support stuff
+#TODO: move other internal stuff down to this section
+
+try:
+    reversed
+except NameError:
+    # 'reversed' added in Python 2.4 (http://www.python.org/doc/2.4/whatsnew/node7.html)
+    def reversed(seq):
+        rseq = list(seq)
+        rseq.reverse()
+        for item in rseq:
+            yield item
+try:
+    sorted
+except NameError:
+    # 'sorted' added in Python 2.4. Note that I'm only implementing enough
+    # of sorted as is used in this module.
+    def sorted(seq, key=None):
+        identity = lambda x: x
+        key_func = (key or identity)
+        sseq = list(seq)
+        sseq.sort(lambda self, other: cmp(key_func(self), key_func(other)))
+        for item in sseq:
+            yield item
+
+
+#---- mainline
+
+def main(argv):
+    try:
+        optlist, args = getopt.getopt(argv[1:], 'hVvo:D:fkI:sc:',
+            ['help', 'version', 'verbose', 'force', 'keep-lines',
+             'no-substitute', 'content-types-path='])
+    except getopt.GetoptError, msg:
+        sys.stderr.write("gmxpreprocess: error: %s. Your invocation was: %s\n"\
+                         % (msg, argv))
+        sys.stderr.write("See 'gmxpreprocess --help'.\n")
+        return 1
+    outfile = sys.stdout
+    defines = {}
+    force = 0
+    keepLines = 0
+    substitute = 1
+    includePath = []
+    contentTypesPaths = []
+    for opt, optarg in optlist:
+        if opt in ('-h', '--help'):
+            sys.stdout.write(__doc__)
+            return 0
+        elif opt in ('-V', '--version'):
+            sys.stdout.write("gmxpreprocess %s\n" % __version__)
+            return 0
+        elif opt in ('-v', '--verbose'):
+            log.setLevel(log.DEBUG)
+        elif opt == '-o':
+            outfile = optarg
+        if opt in ('-f', '--force'):
+            force = 1
+        elif opt == '-D':
+            if optarg.find('=') != -1:
+                var, val = optarg.split('=', 1)
+                try:
+                    val = int(val)
+                except ValueError:
+                    pass
+            else:
+                var, val = optarg, None
+            defines[var] = val
+        elif opt in ('-k', '--keep-lines'):
+            keepLines = 1
+        elif opt == '-I':
+            includePath.append(optarg)
+        elif opt in ('-s', '--no-substitute'):
+            substitute = 0
+        elif opt in ('-c', '--content-types-path'):
+            contentTypesPaths.append(optarg)
+
+    if len(args) != 1:
+        sys.stderr.write("gmxpreprocess: error: incorrect number of "\
+                         "arguments: argv=%r\n" % argv)
+        return 1
+    else:
+        infile = args[0]
+
+    try:
+        contentTypesRegistry = ContentTypesRegistry(contentTypesPaths)
+        gmxpreprocess(infile, outfile, defines, force, keepLines, includePath,
+                   substitute, contentTypesRegistry=contentTypesRegistry)
+    except PreprocessError, ex:
+        if log.isDebugEnabled():
+            import traceback
+            traceback.print_exc(file=sys.stderr)
+        else:
+            sys.stderr.write("gmxpreprocess: error: %s\n" % str(ex))
+        return 1
+
+if __name__ == "__main__":
+    __file__ = sys.argv[0]
+    sys.exit( main(sys.argv) )
+
diff --git a/src/gmxlib/nrnb.c b/src/gmxlib/nrnb.c

index 5bc7155ae7ad07418c390c5d5dbbf8758b796c1a..3a3e5d5e226144ca3894148d3b313b4e6bdc6ba5 100644 (file)
--- a/src/gmxlib/nrnb.c
+++ b/src/gmxlib/nrnb.c
@@ -47,6 +47,10 @@
  #include "smalloc.h"
  #include "copyrite.h"
  
+
+
+
+
  typedef struct {
    const char *name;
    int  flop;
@@ -54,142 +58,37 @@ typedef struct {
  
  
  static const t_nrnb_data nbdata[eNRNB] = {
-    { "LJ",                             33 }, /* nb_kernel010 */
-    { "Buckingham",                     61 }, /* nb_kernel020 */ 
-    { "VdW(T)",                         54 }, /* nb_kernel030 */
-    { "Coulomb",                        27 }, /* nb_kernel100 */
-    { "Coulomb [W3]",                   80 }, /* nb_kernel101 */
-    { "Coulomb [W3-W3]",               234 }, /* nb_kernel102 */
-    { "Coulomb [W4]",                   80 }, /* nb_kernel103 */
-    { "Coulomb [W4-W4]",               234 }, /* nb_kernel104 */
-    { "Coulomb + LJ",                   38 }, /* nb_kernel110 */
-    { "Coulomb + LJ [W3]",              91 }, /* nb_kernel111 */
-    { "Coulomb + LJ [W3-W3]",          245 }, /* nb_kernel112 */
-    { "Coulomb + LJ [W4]",             113 }, /* nb_kernel113 */
-    { "Coulomb + LJ [W4-W4]",          267 }, /* nb_kernel114 */
-    { "Coulomb + Bham ",                64 }, /* nb_kernel120 */
-    { "Coulomb + Bham [W3]",           117 }, /* nb_kernel121 */
-    { "Coulomb + Bham [W3-W3]",        271 }, /* nb_kernel122 */
-    { "Coulomb + Bham [W4]",           141 }, /* nb_kernel123 */
-    { "Coulomb + Bham [W4-W4]",        295 }, /* nb_kernel124 */
-    { "Coulomb + VdW(T) ",              59 }, /* nb_kernel130 */
-    { "Coulomb + VdW(T) [W3]",         112 }, /* nb_kernel131 */
-    { "Coulomb + VdW(T) [W3-W3]",      266 }, /* nb_kernel132 */
-    { "Coulomb + VdW(T) [W4]",         134 }, /* nb_kernel133 */
-    { "Coulomb + VdW(T) [W4-W4]",      288 }, /* nb_kernel134 */
-    { "RF Coul",                        33 }, /* nb_kernel200 */
-    { "RF Coul [W3]",                   98 }, /* nb_kernel201 */
-    { "RF Coul [W3-W3]",               288 }, /* nb_kernel202 */
-    { "RF Coul [W4]",                   98 }, /* nb_kernel203 */
-    { "RF Coul [W4-W4]",               288 }, /* nb_kernel204 */
-    { "RF Coul + LJ",                   44 }, /* nb_kernel210 */
-    { "RF Coul + LJ [W3]",             109 }, /* nb_kernel211 */
-    { "RF Coul + LJ [W3-W3]",          299 }, /* nb_kernel212 */
-    { "RF Coul + LJ [W4]",             131 }, /* nb_kernel213 */
-    { "RF Coul + LJ [W4-W4]",          321 }, /* nb_kernel214 */
-    { "RF Coul + Bham ",                70 }, /* nb_kernel220 */
-    { "RF Coul + Bham [W3]",           135 }, /* nb_kernel221 */
-    { "RF Coul + Bham [W3-W3]",        325 }, /* nb_kernel222 */
-    { "RF Coul + Bham [W4]",           159 }, /* nb_kernel223 */
-    { "RF Coul + Bham [W4-W4]",        349 }, /* nb_kernel224 */
-    { "RF Coul + VdW(T) ",              65 }, /* nb_kernel230 */
-    { "RF Coul + VdW(T) [W3]",         130 }, /* nb_kernel231 */
-    { "RF Coul + VdW(T) [W3-W3]",      320 }, /* nb_kernel232 */
-    { "RF Coul + VdW(T) [W4]",         152 }, /* nb_kernel233 */
-    { "RF Coul + VdW(T) [W4-W4]",      342 }, /* nb_kernel234 */
-    { "Coul(T)",                        42 }, /* nb_kernel300 */
-    { "Coul(T) [W3]",                  125 }, /* nb_kernel301 */
-    { "Coul(T) [W3-W3]",               369 }, /* nb_kernel302 */
-    { "Coul(T) [W4]",                  125 }, /* nb_kernel303 */
-    { "Coul(T) [W4-W4]",               369 }, /* nb_kernel304 */
-    { "Coul(T) + LJ",                   55 }, /* nb_kernel310 */
-    { "Coul(T) + LJ [W3]",             138 }, /* nb_kernel311 */
-    { "Coul(T) + LJ [W3-W3]",          382 }, /* nb_kernel312 */
-    { "Coul(T) + LJ [W4]",             158 }, /* nb_kernel313 */
-    { "Coul(T) + LJ [W4-W4]",          402 }, /* nb_kernel314 */
-    { "Coul(T) + Bham",                 81 }, /* nb_kernel320 */
-    { "Coul(T) + Bham [W3]",           164 }, /* nb_kernel321 */
-    { "Coul(T) + Bham [W3-W3]",        408 }, /* nb_kernel322 */
-    { "Coul(T) + Bham [W4]",           186 }, /* nb_kernel323 */
-    { "Coul(T) + Bham [W4-W4]",        430 }, /* nb_kernel324 */
-    { "Coul(T) + VdW(T)",               68 }, /* nb_kernel330 */
-    { "Coul(T) + VdW(T) [W3]",         151 }, /* nb_kernel331 */
-    { "Coul(T) + VdW(T) [W3-W3]",      395 }, /* nb_kernel332 */
-    { "Coul(T) + VdW(T) [W4]",         179 }, /* nb_kernel333 */
-    { "Coul(T) + VdW(T) [W4-W4]",      423 }, /* nb_kernel334 */
-    { "Generalized Born Coulomb",       48 }, /* nb_kernel400 */
-    { "GB Coulomb + LJ",                61 }, /* nb_kernel410 */
-    { "GB Coulomb + VdW(T)",            79 }, /* nb_kernel430 */
-    { "LJ NF",                          19 }, /* nb_kernel010nf */
-    { "Buckingham NF",                  48 }, /* nb_kernel020nf */ 
-    { "VdW(T) NF",                      33 }, /* nb_kernel030nf */
-    { "Coulomb NF",                     16 }, /* nb_kernel100nf */
-    { "Coulomb [W3] NF",                47 }, /* nb_kernel101nf */
-    { "Coulomb [W3-W3] NF",            135 }, /* nb_kernel102nf */
-    { "Coulomb [W4] NF",                47 }, /* nb_kernel103nf */
-    { "Coulomb [W4-W4] NF",            135 }, /* nb_kernel104nf */
-    { "Coulomb + LJ NF",                24 }, /* nb_kernel110nf */
-    { "Coulomb + LJ [W3] NF",           55 }, /* nb_kernel111nf */
-    { "Coulomb + LJ [W3-W3] NF",       143 }, /* nb_kernel112nf */
-    { "Coulomb + LJ [W4] NF",           66 }, /* nb_kernel113nf */
-    { "Coulomb + LJ [W4-W4] NF",       154 }, /* nb_kernel114nf */
-    { "Coulomb + Bham  NF",             51 }, /* nb_kernel120nf */
-    { "Coulomb + Bham [W3] NF",         82 }, /* nb_kernel121nf */
-    { "Coulomb + Bham [W3-W3] NF",     170 }, /* nb_kernel122nf */
-    { "Coulomb + Bham [W4] NF",         95 }, /* nb_kernel123nf */
-    { "Coulomb + Bham [W4-W4] NF",     183 }, /* nb_kernel124nf */
-    { "Coulomb + VdW(T)  NF",           36 }, /* nb_kernel130nf */
-    { "Coulomb + VdW(T) [W3] NF",       67 }, /* nb_kernel131nf */
-    { "Coulomb + VdW(T) [W3-W3] NF",   155 }, /* nb_kernel132nf */
-    { "Coulomb + VdW(T) [W4] NF",       80 }, /* nb_kernel133nf */
-    { "Coulomb + VdW(T) [W4-W4] NF",   168 }, /* nb_kernel134nf */
-    { "RF Coul NF",                     19 }, /* nb_kernel200nf */
-    { "RF Coul [W3] NF",                56 }, /* nb_kernel201nf */
-    { "RF Coul [W3-W3] NF",            162 }, /* nb_kernel202nf */
-    { "RF Coul [W4] NF",                56 }, /* nb_kernel203nf */
-    { "RF Coul [W4-W4] NF",            162 }, /* nb_kernel204nf */
-    { "RF Coul + LJ NF",                27 }, /* nb_kernel210nf */
-    { "RF Coul + LJ [W3] NF",           64 }, /* nb_kernel211nf */
-    { "RF Coul + LJ [W3-W3] NF",       170 }, /* nb_kernel212nf */
-    { "RF Coul + LJ [W4] NF",           75 }, /* nb_kernel213nf */
-    { "RF Coul + LJ [W4-W4] NF",       181 }, /* nb_kernel214nf */
-    { "RF Coul + Bham  NF",             54 }, /* nb_kernel220nf */
-    { "RF Coul + Bham [W3] NF",         91 }, /* nb_kernel221nf */
-    { "RF Coul + Bham [W3-W3] NF",     197 }, /* nb_kernel222nf */
-    { "RF Coul + Bham [W4] NF",        104 }, /* nb_kernel223nf */
-    { "RF Coul + Bham [W4-W4] NF",     210 }, /* nb_kernel224nf */
-    { "RF Coul + VdW(T)  NF",           39 }, /* nb_kernel230nf */
-    { "RF Coul + VdW(T) [W3] NF",       76 }, /* nb_kernel231nf */
-    { "RF Coul + VdW(T) [W3-W3] NF",   182 }, /* nb_kernel232nf */
-    { "RF Coul + VdW(T) [W4] NF",       89 }, /* nb_kernel233nf */
-    { "RF Coul + VdW(T) [W4-W4] NF",   195 }, /* nb_kernel234nf */
-    { "Coul(T) NF",                     26 }, /* nb_kernel300nf */
-    { "Coul(T) [W3] NF",                77 }, /* nb_kernel301nf */
-    { "Coul(T) [W3-W3] NF",            225 }, /* nb_kernel302nf */
-    { "Coul(T) [W4] NF",                77 }, /* nb_kernel303nf */
-    { "Coul(T) [W4-W4] NF",            225 }, /* nb_kernel304nf */
-    { "Coul(T) + LJ NF",                34 }, /* nb_kernel310nf */
-    { "Coul(T) + LJ [W3] NF",           85 }, /* nb_kernel311nf */
-    { "Coul(T) + LJ [W3-W3] NF",       233 }, /* nb_kernel312nf */
-    { "Coul(T) + LJ [W4] NF",           96 }, /* nb_kernel313nf */
-    { "Coul(T) + LJ [W4-W4] NF",       244 }, /* nb_kernel314nf */
-    { "Coul(T) + Bham NF",              61 }, /* nb_kernel320nf */
-    { "Coul(T) + Bham [W3] NF",        112 }, /* nb_kernel321nf */
-    { "Coul(T) + Bham [W3-W3] NF",     260 }, /* nb_kernel322nf */
-    { "Coul(T) + Bham [W4] NF",        125 }, /* nb_kernel323nf */
-    { "Coul(T) + Bham [W4-W4] NF",     273 }, /* nb_kernel324nf */
-    { "Coul(T) + VdW(T) NF",            42 }, /* nb_kernel330nf */
-    { "Coul(T) + VdW(T) [W3] NF",       93 }, /* nb_kernel331nf */
-    { "Coul(T) + VdW(T) [W3-W3] NF",   241 }, /* nb_kernel332nf */
-    { "Coul(T) + VdW(T) [W4] NF",      110 }, /* nb_kernel333nf */
-    { "Coul(T) + VdW(T) [W4-W4] NF",   258 }, /* nb_kernel334nf */
-    { "Generalized Born Coulomb NF",    29 }, /* nb_kernel400nf */
-    { "GB Coulomb + LJ NF",             37 }, /* nb_kernel410nf */
-    { "GB Coulomb + VdW(T) NF",         49 }, /* nb_kernel430nf */
-    { "Free energy innerloop",         150 }, /* free energy, estimate */  
-    { "All-vs-All, Coul + LJ",          38 },
-    { "All-vs-All, GB + LJ",            61 },
-    { "Outer nonbonded loop",           10 },
+    /* These are re-used for different NB kernels, since there are so many.
+     * The actual number of flops is set dynamically.
+     */
+    { "NB VdW [V&F]",                    1 },
+    { "NB VdW [F]",                      1 },
+    { "NB Elec. [V&F]",                  1 },
+    { "NB Elec. [F]",                    1 },
+    { "NB Elec. [W3,V&F]",               1 },
+    { "NB Elec. [W3,F]",                 1 },
+    { "NB Elec. [W3-W3,V&F]",            1 },
+    { "NB Elec. [W3-W3,F]",              1 },
+    { "NB Elec. [W4,V&F]",               1 },
+    { "NB Elec. [W4,F]",                 1 },
+    { "NB Elec. [W4-W4,V&F]",            1 },
+    { "NB Elec. [W4-W4,F]",              1 },
+    { "NB VdW & Elec. [V&F]",            1 },
+    { "NB VdW & Elec. [F]",              1 },
+    { "NB VdW & Elec. [W3,V&F]",         1 },
+    { "NB VdW & Elec. [W3,F]",           1 },
+    { "NB VdW & Elec. [W3-W3,V&F]",      1 },
+    { "NB VdW & Elec. [W3-W3,F]",        1 },
+    { "NB VdW & Elec. [W4,V&F]",         1 },
+    { "NB VdW & Elec. [W4,F]",           1 },
+    { "NB VdW & Elec. [W4-W4,V&F]",      1 },
+    { "NB VdW & Elec. [W4-W4,F]",        1 },
+    
+    { "NB Generic kernel",               1 },
+    { "NB Free energy kernel",           1 },
+    { "NB All-vs-all",                   1 },
+    { "NB All-vs-all, GB",               1 },
+
      { "Pair Search distance check",      9 }, /* nbnxn pair dist. check */
      /* nbnxn kernel flops are based on inner-loops without exclusion checks.
       * Plain Coulomb runs through the RF kernels, except with CUDA.
@@ -201,23 +100,23 @@ static const t_nrnb_data nbdata[eNRNB] = {
       * - GPU always does exclusions, which requires 2-4 flops, but as invsqrt
       *   is always counted as 6 flops, this roughly compensates.
       */
-    { "LJ + Coulomb RF (F)",            38 }, /* nbnxn kernel LJ+RF, no ener */
-    { "LJ + Coulomb RF (F+E)",          54 },
-    { "LJ + Coulomb tabulated (F)",     41 }, /* nbnxn kernel LJ+tab, no en */
-    { "LJ + Coulomb tabulated (F+E)",   59 },
-    { "LJ (F)",                         33 }, /* nbnxn kernel LJ, no ener */
-    { "LJ (F+E)",                       43 },
-    { "Coulomb RF (F)",                 31 }, /* nbnxn kernel RF, no ener */
-    { "Coulomb RF (F+E)",               36 },
-    { "Coulomb tabulated (F)",          34 }, /* nbnxn kernel tab, no ener */
-    { "Coulomb tabulated (F+E)",        41 },
+    { "NxN RF Elec. + VdW [F]",         38 }, /* nbnxn kernel LJ+RF, no ener */
+    { "NxN RF Elec. + VdW [V&F]",       54 },
+    { "NxN CSTab Elec. + VdW [F]",      41 }, /* nbnxn kernel LJ+tab, no en */
+    { "NxN CSTab Elec. + VdW [V&F]",    59 },
+    { "NxN VdW [F]",                    33 }, /* nbnxn kernel LJ, no ener */
+    { "NxN VdW [V&F]",                  43 },
+    { "NxN RF Electrostatics [F]",      31 }, /* nbnxn kernel RF, no ener */
+    { "NxN RF Electrostatics [V&F]",    36 },
+    { "NxN CSTab Elec. [F]",            34 }, /* nbnxn kernel tab, no ener */
+    { "NxN CSTab Elec. [V&F]",          41 },
      { "1,4 nonbonded interactions",     90 },
      { "Born radii (Still)",             47 },
      { "Born radii (HCT/OBC)",          183 },
      { "Born force chain rule",          15 },
-    { "All-vs-All Still radii",         47 },
-    { "All-vs-All HCT/OBC radii",      183 },
-    { "All-vs-All Born chain rule",     15 },
+    { "All-vs-All Still radii",          1 },
+    { "All-vs-All HCT/OBC radii",        1 },
+    { "All-vs-All Born chain rule",      1 },
      { "Calc Weights",                   36 },
      { "Spread Q",                        6 },
      { "Spread Q Bspline",                2 }, 
@@ -333,7 +232,7 @@ void print_flop(FILE *out,t_nrnb *nrnb,double *nbfs,double *mflop)
    const char   *myline = "-----------------------------------------------------------------------------";
    
    *nbfs = 0.0;
-  for(i=0; (i<eNR_NBKERNEL_NR); i++) {
+  for(i=0; (i<eNR_NBKERNEL_ALLVSALLGB); i++) {
      if (strstr(nbdata[i].name,"W3-W3") != NULL)
        *nbfs += 9e-6*nrnb->n[i];
      else if (strstr(nbdata[i].name,"W3") != NULL)
@@ -357,14 +256,16 @@ void print_flop(FILE *out,t_nrnb *nrnb,double *nbfs,double *mflop)
      fprintf(out,"\n\tM E G A - F L O P S   A C C O U N T I N G\n\n");
    }
  
-  if (out) {
-    fprintf(out,"   RF=Reaction-Field  FE=Free Energy  SCFE=Soft-Core/Free Energy\n");
-    fprintf(out,"   T=Tabulated        W3=SPC/TIP3p    W4=TIP4p (single or pairs)\n");
-    fprintf(out,"   NF=No Forces\n\n");
-    
-    fprintf(out," %-32s %16s %15s  %7s\n",
-           "Computing:","M-Number","M-Flops","% Flops");
-    fprintf(out,"%s\n",myline);
+  if (out)
+  {
+      fprintf(out," NB=Group-cutoff nonbonded kernels    NxN=N-by-N tile Verlet kernels\n");
+      fprintf(out," RF=Reaction-Field  VdW=Van der Waals  CSTab=Cubic-spline table\n");
+      fprintf(out," W3=SPC/TIP3p  W4=TIP4p (single or pairs)\n");
+      fprintf(out," V&F=Potential and force  V=Potential only  F=Force only\n\n");
+
+      fprintf(out," %-32s %16s %15s  %7s\n",
+              "Computing:","M-Number","M-Flops","% Flops");
+      fprintf(out,"%s\n",myline);
    }
    *mflop=0.0;
    tfrac=0.0;
@@ -462,7 +363,7 @@ const char *nrnb_str(int enr)
  static const int    force_index[]={ 
    eNR_BONDS,  eNR_ANGLES,  eNR_PROPER, eNR_IMPROPER, 
    eNR_RB,     eNR_DISRES,  eNR_ORIRES, eNR_POSRES,
-  eNR_NS,     eNR_NBKERNEL_OUTER
+  eNR_NS,
  };
  #define NFORCE_INDEX asize(force_index)
  
@@ -513,7 +414,7 @@ void pr_load(FILE *log,t_commrec *cr,t_nrnb nrnb[])
    for(i=0; (i<cr->nnodes); i++) {
        add_nrnb(av,av,&(nrnb[i]));
        /* Cost due to forces */
-      for(j=0; (j<eNR_NBKERNEL_NR); j++)
+      for(j=0; (j<eNR_NBKERNEL_ALLVSALLGB); j++)
         ftot[i]+=nrnb[i].n[j]*cost_nrnb(j);
        for(j=0; (j<NFORCE_INDEX); j++) 
         ftot[i]+=nrnb[i].n[force_index[j]]*cost_nrnb(force_index[j]);
diff --git a/src/gmxlib/string2.c b/src/gmxlib/string2.c

index 9f84eb75d84e85ed50c4569c57557a209ae113af..5af6f04a9e74ff33a31e85f709a456e8a4f50d47 100644 (file)
--- a/src/gmxlib/string2.c
+++ b/src/gmxlib/string2.c
@@ -231,42 +231,53 @@ void nice_header (FILE *out,const char *fn)
    fprintf (out,"%c\n",COMMENTSIGN);
  }
  
+
  int gmx_strcasecmp_min(const char *str1, const char *str2)
  {
-  char ch1,ch2;
-  
-  do
+    char ch1,ch2;
+
+    do
      {
-      do
-       ch1=toupper(*(str1++));
-      while ((ch1=='-') || (ch1=='_'));
-      do 
-       ch2=toupper(*(str2++));
-      while ((ch2=='-') || (ch2=='_'));
-      if (ch1!=ch2) return (ch1-ch2);
+        do
+        {
+            ch1=toupper(*(str1++));
+        }
+        while ((ch1=='-') || (ch1=='_'));
+        do
+        {
+            ch2=toupper(*(str2++));
+        }
+        while ((ch2=='-') || (ch2=='_'));
+
+        if (ch1!=ch2) return (ch1-ch2);
      }
-  while (ch1);
-  return 0; 
+    while (ch1);
+    return 0;
  }
  
  int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
  {
-  char ch1,ch2;
-  char *stri1, *stri2;
+    char ch1,ch2;
+    char *stri1, *stri2;
  
-  stri1=(char *)str1;
-  stri2=(char *)str2;  
-  do
+    stri1=(char *)str1;
+    stri2=(char *)str2;
+    do
      {
-      do
-       ch1=toupper(*(str1++));
-      while ((ch1=='-') || (ch1=='_'));
-      do 
-       ch2=toupper(*(str2++));
-      while ((ch2=='-') || (ch2=='_'));
-      if (ch1!=ch2) return (ch1-ch2);
+        do
+        {
+            ch1=toupper(*(str1++));
+        }
+        while ((ch1=='-') || (ch1=='_'));
+        do
+        {
+            ch2=toupper(*(str2++));
+        }
+        while ((ch2=='-') || (ch2=='_'));
+
+        if (ch1!=ch2) return (ch1-ch2);
      }
-  while (ch1 && (str1-stri1<n) && (str2-stri2<n));
+    while (ch1 && (str1-stri1<n) && (str2-stri2<n));
    return 0; 
  }
  
@@ -329,6 +340,25 @@ gmx_strndup(const char *src, int n)
      return dest;
  }
  
+/* Magic hash init number for Dan J. Bernsteins algorithm.
+ * Do NOT use any other value unless you really know what you are doing.
+ */
+const unsigned int
+gmx_string_hash_init = 5381;
+
+
+unsigned int
+gmx_string_hash_func(const char *s, unsigned int hash_init)
+{
+    int c;
+
+    while ((c = toupper(*s++)) != '\0')
+    {
+        if(isalnum(c)) hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
+    }
+    return hash_init;
+}
+
  /*!
   * \param[in] pattern  Pattern to match against.
   * \param[in] str      String to match.
diff --git a/src/gmxlib/tpxio.c b/src/gmxlib/tpxio.c

index da659df491549c0dea599db5a482c2ba2d4668fe..62857d510ec66dc597e73486df7934374a030942 100644 (file)
--- a/src/gmxlib/tpxio.c
+++ b/src/gmxlib/tpxio.c
@@ -73,7 +73,7 @@
  static const char *tpx_tag = TPX_TAG_RELEASE;
  
  /* This number should be increased whenever the file format changes! */
-static const int tpx_version = 81;
+static const int tpx_version = 82;
  
  /* This number should only be increased when you edit the TOPOLOGY section
   * or the HEADER of the tpx format.
@@ -687,7 +687,16 @@ static void do_inputrec(t_fileio *fio, t_inputrec *ir,gmx_bool bRead,
      if (file_version >= 67) {
        gmx_fio_do_real(fio,ir->rlistlong);
      }
-    gmx_fio_do_int(fio,ir->coulombtype); 
+    if(file_version >= 82)
+    {
+        gmx_fio_do_int(fio,ir->nstcalclr);
+    }
+    else
+    {
+        /* Calculate at NS steps */
+        ir->nstcalclr = ir->nstlist;
+    }
+    gmx_fio_do_int(fio,ir->coulombtype);
      if (file_version < 32 && ir->coulombtype == eelRF)
        ir->coulombtype = eelRF_NEC;      
      if (file_version >= 81)
diff --git a/src/gmxlib/txtdump.c b/src/gmxlib/txtdump.c

index c5cf5bc32a393295713cc084c80399e73541052c..945a31a4d86c734251bb4f8669ae62438371c767 100644 (file)
--- a/src/gmxlib/txtdump.c
+++ b/src/gmxlib/txtdump.c
@@ -698,6 +698,7 @@ void pr_inputrec(FILE *fp,int indent,const char *title,t_inputrec *ir,
      PR("verlet-buffer-drift",ir->verletbuf_drift);
      PR("rlist",ir->rlist);
      PR("rlistlong",ir->rlistlong);
+    PR("nstcalclr",ir->nstcalclr);
      PR("rtpi",ir->rtpi);
      PS("coulombtype",EELTYPE(ir->coulombtype));
      PS("coulomb-modifier",INTMODIFIER(ir->coulomb_modifier));
diff --git a/src/kernel/do_gct.c b/src/kernel/do_gct.c

index cf9c4a805ae3c7b3201fe98938a1f0aa393b67ed..5a8de3857ece158a1bc8ae2f319ed9b0fd825643 100644 (file)
--- a/src/kernel/do_gct.c
+++ b/src/kernel/do_gct.c
@@ -309,8 +309,9 @@ static void upd_nbfplj(FILE *log,real *nbfp,int atnr,real f6[],real f12[],
         else
           sig  = sqrt(sigma[n]*sigma[m]);
         sig6 = pow(sig,6.0);
-       C6 (nbfp,atnr,n,m) = 4*eps*sig6; 
-       C12(nbfp,atnr,n,m) = 4*eps*sig6*sig6;
+    /* nbfp now includes the 6.0/12.0 derivative prefactors */
+       C6 (nbfp,atnr,n,m) = 4*eps*sig6/6.0;
+       C12(nbfp,atnr,n,m) = 4*eps*sig6*sig6/12.0;
        }
      }
      sfree(sigma);
@@ -677,9 +678,12 @@ void do_coupling(FILE *log,const output_env_t oenv,int nfile,
        ati = tclj->at_i;
        atj = tclj->at_j;
        if (atj == -1) 
-       atj = ati;
-      tclj->c6  =  C6(fr->nbfp,fr->ntype,ati,atj);
-      tclj->c12 = C12(fr->nbfp,fr->ntype,ati,atj);
+      {
+          atj = ati;
+      }
+      /* nbfp now includes the 6.0/12.0 derivative prefactors */
+      tclj->c6  =  C6(fr->nbfp,fr->ntype,ati,atj)/6.0;
+      tclj->c12 = C12(fr->nbfp,fr->ntype,ati,atj)/12.0;
      }
    }
    else {
@@ -707,10 +711,13 @@ void do_coupling(FILE *log,const output_env_t oenv,int nfile,
        ati = tcbu->at_i;
        atj = tcbu->at_j;
        if (atj == -1) 
-       atj = ati;
+      {
+          atj = ati;
+      }
+      /* nbfp now includes the 6.0 derivative prefactors */
        tcbu->a = BHAMA(fr->nbfp,fr->ntype,ati,atj);
        tcbu->b = BHAMB(fr->nbfp,fr->ntype,ati,atj);
-      tcbu->c = BHAMC(fr->nbfp,fr->ntype,ati,atj);
+      tcbu->c = BHAMC(fr->nbfp,fr->ntype,ati,atj)/6.0;
        if (debug)
         fprintf(debug,"buck (type=%d) = %e, %e, %e\n",
                 tcbu->at_i,tcbu->a,tcbu->b,tcbu->c);
diff --git a/src/kernel/gctio.c b/src/kernel/gctio.c

index 27d46ad6607d8e4ffa2d20f1a4b802be839debfc..fca07bf6fb675c60519642d9dd21f16e034a39c0 100644 (file)
--- a/src/kernel/gctio.c
+++ b/src/kernel/gctio.c
@@ -158,8 +158,9 @@ void copy_ff(t_coupl_rec *tcr,t_forcerec *fr,t_mdatoms *md,t_idef *idef)
      atj  = tclj->at_j;
      if (atj == -1)
        atj = ati;
-    tclj->c6  = C6(fr->nbfp,fr->ntype,ati,atj);
-    tclj->c12 = C12(fr->nbfp,fr->ntype,ati,atj);
+    /* nbfp now includes the 6.0/12.0 derivative prefactors */
+    tclj->c6  = C6(fr->nbfp,fr->ntype,ati,atj)/6.0;
+    tclj->c12 = C12(fr->nbfp,fr->ntype,ati,atj)/12.0;
    }
    
    for(i=0; (i<tcr->nBU); i++) {
@@ -169,9 +170,10 @@ void copy_ff(t_coupl_rec *tcr,t_forcerec *fr,t_mdatoms *md,t_idef *idef)
      atj  = tcbu->at_j;
      if (atj == -1)
        atj = ati;
+    /* nbfp now includes the 6.0 derivative prefactor */
      tcbu->a = BHAMA(fr->nbfp,fr->ntype,ati,atj);
      tcbu->b = BHAMB(fr->nbfp,fr->ntype,ati,atj);
-    tcbu->c = BHAMC(fr->nbfp,fr->ntype,ati,atj);
+    tcbu->c = BHAMC(fr->nbfp,fr->ntype,ati,atj)/6.0;
    }
    
    for(i=0; (i<tcr->nQ); i++) {
diff --git a/src/kernel/md.c b/src/kernel/md.c

index 59613cc238d4b3a36f7336348c94de52fd22f785..7e17b8da3203cfae775503f2fd7a9428d917bbb3 100644 (file)
--- a/src/kernel/md.c
+++ b/src/kernel/md.c
@@ -202,6 +202,7 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
      gmx_bool        bAppend;
      gmx_bool        bResetCountersHalfMaxH=FALSE;
      gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
+    gmx_bool        bUpdateDoLR;
      real        mu_aver=0,dvdl;
      int         a0,a1,gnx=0,ii;
      atom_id     *grpindex=NULL;
@@ -236,12 +237,8 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
  
      if(MASTER(cr))
      {
-        fprintf(stderr,
-                "\n* WARNING * WARNING * WARNING * WARNING * WARNING * WARNING *\n"
-                "We have just committed the new CPU detection code in this branch,\n"
-                "and will commit new SSE/AVX kernels in a few days. However, this\n"
-                "means that currently only the NxN kernels are accelerated!\n"
-                "In the mean time, you might want to avoid production runs in 4.6.\n\n");
+        gmx_warning("New C kernels (and force-only) kernels are now enabled,\n"
+                    "but it will be another couple of days for SSE/AVX kernels.\n\n");
      }
  
  #ifdef GMX_FAHCORE
@@ -522,8 +519,7 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
      /* PME tuning is only supported with GPUs or PME nodes and not with rerun */
      if ((Flags & MD_TUNEPME) &&
          EEL_PME(fr->eeltype) &&
-        fr->cutoff_scheme == ecutsVERLET &&
-        (fr->nbv->bUseGPU || !(cr->duty & DUTY_PME)) &&
+        ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
          !bRerunMD)
      {
          pme_loadbal_init(&pme_loadbal,ir,state->box,fr->ic,fr->pmedata);
@@ -1106,12 +1102,19 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
          force_flags = (GMX_FORCE_STATECHANGED |
                         ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
                         GMX_FORCE_ALLFORCES |
-                       (bNStList ? GMX_FORCE_DOLR : 0) |
                         GMX_FORCE_SEPLRF |
                         (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
                         (bCalcEner ? GMX_FORCE_ENERGY : 0) |
                         (bDoFEP ? GMX_FORCE_DHDL : 0)
              );
+
+        if(fr->bTwinRange)
+        {
+            if(do_per_step(step,ir->nstcalclr))
+            {
+                force_flags |= GMX_FORCE_DO_LR;
+            }
+        }
          
          if (shellfc)
          {
@@ -1179,8 +1182,16 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                  trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
              }
  
+            /* If we are using twin-range interactions where the long-range component
+             * is only evaluated every nstcalclr>1 steps, we should do a special update
+             * step to combine the long-range forces on these steps.
+             * For nstcalclr=1 this is not done, since the forces would have been added
+             * directly to the short-range forces already.
+             */
+            bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
+            
              update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,
-                          f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                          f,bUpdateDoLR,fr->f_twin,fcd,
                            ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
                            cr,nrnb,constr,&top->idef);
              
@@ -1650,9 +1661,11 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
  
                  if (bVV)
                  {
+                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
+
                      /* velocity half-step update */
                      update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                                  bUpdateDoLR,fr->f_twin,fcd,
                                    ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
                                    cr,nrnb,constr,&top->idef);
                  }
@@ -1666,9 +1679,10 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                  {
                      copy_rvecn(state->x,cbuf,0,state->natoms);
                  }
-                
+                bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
+
                  update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                              fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                              bUpdateDoLR,fr->f_twin,fcd,
                                ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
                  wallcycle_stop(wcycle,ewcUPDATE);
  
@@ -1693,8 +1707,10 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                      /* now we know the scaling, we can compute the positions again again */
                      copy_rvecn(cbuf,state->x,0,state->natoms);
  
+                    bUpdateDoLR = (fr->bTwinRange && do_per_step(step,ir->nstcalclr));
+
                      update_coords(fplog,step,ir,mdatoms,state,fr->bMolPBC,f,
-                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                                  bUpdateDoLR,fr->f_twin,fcd,
                                    ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
                      wallcycle_stop(wcycle,ewcUPDATE);
  
@@ -2067,13 +2083,17 @@ double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
                                           fr->ic,fr->nbv,&fr->pmedata,
                                           step);
  
+                    /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
                      fr->ewaldcoeff = fr->ic->ewaldcoeff;
+                    fr->rlist      = fr->ic->rlist;
+                    fr->rlistlong  = fr->ic->rlistlong;
+                    fr->rcoulomb   = fr->ic->rcoulomb;
+                    fr->rvdw       = fr->ic->rvdw;
                  }
-
                  cycles_pmes = 0;
              }
          }
-        
+
          if (step_rel == wcycle_get_reset_counters(wcycle) ||
              gs.set[eglsRESETCOUNTERS] != 0)
          {
diff --git a/src/kernel/openmm_wrapper.cpp b/src/kernel/openmm_wrapper.cpp

index 297bbc1a0b0ac4da8c0179fe48fd900d94f58e78..07f0ecaccee38b501aca88e200d92dde9ed10cda 100644 (file)
--- a/src/kernel/openmm_wrapper.cpp
+++ b/src/kernel/openmm_wrapper.cpp
@@ -669,23 +669,24 @@ static void checkGmxOptions(FILE* fplog, GmxOpenMMPlatformOptions *opt,
      for (i = 0; i < natoms; i++)
      {
          /* i-i */
-        c12 = C12(nbfp, natoms, i, i);
-        c6  = C6(nbfp,  natoms, i, i);
+        /* nbfp now includes the 6.0/12.0 prefactors to save flops in kernels */
+        c12 = C12(nbfp, natoms, i, i)/12.0;
+        c6  = C6(nbfp,  natoms, i, i)/6.0;
          convert_c_12_6(c12, c6, &sigma_ii, &eps_ii);
  
          for (j = 0; j < i; j++)
          {
              /* i-j */
-            c12 = C12(nbfp, natoms, i, j);
-            c6  = C6(nbfp,  natoms, i, j);
+            c12 = C12(nbfp, natoms, i, j)/12.0;
+            c6  = C6(nbfp,  natoms, i, j)/6.0;
              convert_c_12_6(c12, c6, &sigma_ij, &eps_ij);
              /* j-i */
-            c12 = C12(nbfp, natoms, j, i);
-            c6  = C6(nbfp,  natoms, j, i);
+            c12 = C12(nbfp, natoms, j, i)/12.0;
+            c6  = C6(nbfp,  natoms, j, i)/6.0;
              convert_c_12_6(c12, c6, &sigma_ji, &eps_ji);
              /* j-j */
-            c12 = C12(nbfp, natoms, j, j);
-            c6  = C6(nbfp,  natoms, j, j);
+            c12 = C12(nbfp, natoms, j, j)/12.0;
+            c6  = C6(nbfp,  natoms, j, j)/6.0;
              convert_c_12_6(c12, c6, &sigma_jj, &eps_jj);
              /* OpenMM hardcoded combination rules */
              sigma_comb = COMBRULE_SIGMA(sigma_ii, sigma_jj);
@@ -1107,8 +1108,9 @@ void* openmm_init(FILE *fplog, const char *platformOptStr,
  
          for (int i = 0; i < numAtoms; ++i)
          {
-            double c12 = nbfp[types[i]*2*ntypes+types[i]*2+1];
-            double c6 = nbfp[types[i]*2*ntypes+types[i]*2];
+            /* nbfp now includes the 6.0/12.0 derivative prefactors to save flops in kernels*/
+            double c12 = nbfp[types[i]*2*ntypes+types[i]*2+1]/12.0;
+            double c6 = nbfp[types[i]*2*ntypes+types[i]*2]/6.0;
              double sigma=0.0, epsilon=0.0;
              convert_c_12_6(c12, c6, &sigma, &epsilon);
              nonbondedForce->addParticle(charges[i], sigma, epsilon);
diff --git a/src/kernel/pme_loadbal.c b/src/kernel/pme_loadbal.c

index 13e992f993448887485eed33bdec032ec8d6ace0..afe17d0fc70741c735b51e7fbcaec5b743a2e173 100644 (file)
--- a/src/kernel/pme_loadbal.c
+++ b/src/kernel/pme_loadbal.c
@@ -49,8 +49,10 @@
  
  /* Parameters and setting for one PP-PME setup */
  typedef struct {
-    real rcut;            /* Coulomb cut-off                              */
+    real rcut_coulomb;    /* Coulomb cut-off                              */
      real rlist;           /* pair-list cut-off                            */
+    real rlistlong;       /* LR pair-list cut-off                         */
+    int  nstcalclr;       /* frequency of evaluating long-range forces for group scheme */
      real spacing;         /* (largest) PME grid spacing                   */
      ivec grid;            /* the PME grid dimensions                      */
      real grid_efficiency; /* ineffiency factor for non-uniform grids <= 1 */
@@ -83,7 +85,11 @@ struct pme_load_balancing {
      int  nstage;        /* the current maximum number of stages */
  
      real cut_spacing;   /* the minimum cutoff / PME grid spacing ratio */
-    real rbuf;          /* the pairlist buffer size */
+    real rcut_vdw;      /* Vdw cutoff (does not change) */
+    real rcut_coulomb_start; /* Initial electrostatics cutoff */
+    int  nstcalclr_start; /* Initial electrostatics cutoff */
+    real rbuf_coulomb;  /* the pairlist buffer size */
+    real rbuf_vdw;      /* the pairlist buffer size */
      matrix box_start;   /* the initial simulation box */
      int n;              /* the count of setup as well as the allocation size */
      pme_setup_t *setup; /* the PME+cutoff setups */
@@ -92,6 +98,7 @@ struct pme_load_balancing {
      int start;          /* start of setup range to consider in stage>0 */
      int end;            /* end   of setup range to consider in stage>0 */
      int elimited;       /* was the balancing limited, uses enum above */
+    int cutoff_scheme;  /* Verlet or group cut-offs */
  
      int stage;          /* the current stage */
  };
@@ -110,7 +117,32 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
      /* Any number of stages >= 2 is supported */
      pme_lb->nstage   = 2;
  
-    pme_lb->rbuf = ic->rlist - ic->rcoulomb;
+    pme_lb->cutoff_scheme = ir->cutoff_scheme;
+
+    if(pme_lb->cutoff_scheme == ecutsVERLET)
+    {
+        pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
+        pme_lb->rbuf_vdw     = pme_lb->rbuf_coulomb;
+    }
+    else
+    {
+        if(ic->rcoulomb > ic->rlist)
+        {
+            pme_lb->rbuf_coulomb = ic->rlistlong - ic->rcoulomb;
+        }
+        else
+        {
+            pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
+        }
+        if(ic->rvdw > ic->rlist)
+        {
+            pme_lb->rbuf_vdw = ic->rlistlong - ic->rvdw;
+        }
+        else
+        {
+            pme_lb->rbuf_vdw = ic->rlist - ic->rvdw;
+        }
+    }
  
      copy_mat(box,pme_lb->box_start);
      if (ir->ePBC==epbcXY && ir->nwall==2)
@@ -121,13 +153,19 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
      pme_lb->n = 1;
      snew(pme_lb->setup,pme_lb->n);
  
+    pme_lb->rcut_vdw              = ic->rvdw;
+    pme_lb->rcut_coulomb_start    = ir->rcoulomb;
+    pme_lb->nstcalclr_start       = ir->nstcalclr;
+    
      pme_lb->cur = 0;
-    pme_lb->setup[0].rcut       = ic->rcoulomb;
-    pme_lb->setup[0].rlist      = ic->rlist;
-    pme_lb->setup[0].grid[XX]   = ir->nkx;
-    pme_lb->setup[0].grid[YY]   = ir->nky;
-    pme_lb->setup[0].grid[ZZ]   = ir->nkz;
-    pme_lb->setup[0].ewaldcoeff = ic->ewaldcoeff;
+    pme_lb->setup[0].rcut_coulomb = ic->rcoulomb;
+    pme_lb->setup[0].rlist        = ic->rlist;
+    pme_lb->setup[0].rlistlong    = ic->rlistlong;
+    pme_lb->setup[0].nstcalclr    = ir->nstcalclr;
+    pme_lb->setup[0].grid[XX]     = ir->nkx;
+    pme_lb->setup[0].grid[YY]     = ir->nky;
+    pme_lb->setup[0].grid[ZZ]     = ir->nkz;
+    pme_lb->setup[0].ewaldcoeff   = ic->ewaldcoeff;
  
      pme_lb->setup[0].pmedata  = pmedata;
      
@@ -166,6 +204,7 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
  {
      pme_setup_t *set;
      real fac,sp;
+    real tmpr_coulomb,tmpr_vdw;
      int d;
  
      /* Try to add a new setup with next larger cut-off to the list */
@@ -200,9 +239,55 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
      }
      while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing);
  
-    set->rcut    = pme_lb->cut_spacing*sp;
-    set->rlist   = set->rcut + pme_lb->rbuf;
-    set->spacing = sp;
+    set->rcut_coulomb = pme_lb->cut_spacing*sp;
+
+    if(pme_lb->cutoff_scheme == ecutsVERLET)
+    {
+        set->rlist        = set->rcut_coulomb + pme_lb->rbuf_coulomb;
+        /* We dont use LR lists with Verlet, but this avoids if-statements in further checks */
+        set->rlistlong    = set->rlist;
+    }
+    else
+    {
+        tmpr_coulomb          = set->rcut_coulomb + pme_lb->rbuf_coulomb;
+        tmpr_vdw              = pme_lb->rcut_vdw + pme_lb->rbuf_vdw;
+        set->rlist            = min(tmpr_coulomb,tmpr_vdw);
+        set->rlistlong        = max(tmpr_coulomb,tmpr_vdw);
+        
+        /* Set the long-range update frequency */
+        if(set->rlist == set->rlistlong)
+        {
+            /* No long-range interactions if the short-/long-range cutoffs are identical */
+            set->nstcalclr = 0;
+        }
+        else if(pme_lb->nstcalclr_start==0 || pme_lb->nstcalclr_start==1)
+        {
+            /* We were not doing long-range before, but now we are since rlist!=rlistlong */
+            set->nstcalclr = 1;
+        }
+        else
+        {
+            /* We were already doing long-range interactions from the start */
+            if(pme_lb->rcut_vdw > pme_lb->rcut_coulomb_start)
+            {
+                /* We were originally doing long-range VdW-only interactions.
+                 * If rvdw is still longer than rcoulomb we keep the original nstcalclr,
+                 * but if the coulomb cutoff has become longer we should update the long-range
+                 * part every step.
+                 */
+                set->nstcalclr = (tmpr_vdw > tmpr_coulomb) ? pme_lb->nstcalclr_start : 1;
+            }
+            else
+            {
+                /* We were not doing any long-range interaction from the start,
+                 * since it is not possible to do twin-range coulomb for the PME interaction.
+                 */
+                set->nstcalclr = 1;
+            }
+        }
+    }
+    
+    set->spacing      = sp;
      /* The grid efficiency is the size wrt a grid with uniform x/y/z spacing */
      set->grid_efficiency = 1;
      for(d=0; d<DIM; d++)
@@ -211,17 +296,16 @@ static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
      }
      /* The Ewald coefficient is inversly proportional to the cut-off */
      set->ewaldcoeff =
-        pme_lb->setup[0].ewaldcoeff*pme_lb->setup[0].rcut/set->rcut;
+        pme_lb->setup[0].ewaldcoeff*pme_lb->setup[0].rcut_coulomb/set->rcut_coulomb;
  
      set->count   = 0;
      set->cycles  = 0;
  
      if (debug)
      {
-        fprintf(debug,"PME loadbal: grid %d %d %d, cutoff %f\n",
-                set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut);
+        fprintf(debug,"PME loadbal: grid %d %d %d, coulomb cutoff %f\n",
+                set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut_coulomb);
      }
-
      return TRUE;
  }
  
@@ -232,7 +316,7 @@ static void print_grid(FILE *fp_err,FILE *fp_log,
                         double cycles)
  {
      char buf[STRLEN],buft[STRLEN];
-    
+
      if (cycles >= 0)
      {
          sprintf(buft,": %.1f M-cycles",cycles*1e-6);
@@ -241,9 +325,9 @@ static void print_grid(FILE *fp_err,FILE *fp_log,
      {
          buft[0] = '\0';
      }
-    sprintf(buf,"%-11s%10s pme grid %d %d %d, cutoff %.3f%s",
+    sprintf(buf,"%-11s%10s pme grid %d %d %d, coulomb cutoff %.3f%s",
              pre,
-            desc,set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut,
+            desc,set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut_coulomb,
              buft);
      if (fp_err != NULL)
      {
@@ -274,10 +358,10 @@ static void print_loadbal_limited(FILE *fp_err,FILE *fp_log,
  {
      char buf[STRLEN],sbuf[22];
  
-    sprintf(buf,"step %4s: the %s limited the PME load balancing to a cut-off of %.3f",
+    sprintf(buf,"step %4s: the %s limited the PME load balancing to a coulomb cut-off of %.3f",
              gmx_step_str(step,sbuf),
              pmelblim_str[pme_lb->elimited],
-            pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut);
+            pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut_coulomb);
      if (fp_err != NULL)
      {
          fprintf(fp_err,"\r%s\n",buf);
@@ -335,6 +419,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
      pme_setup_t *set;
      double cycles_fast;
      char buf[STRLEN],sbuf[22];
+    real rtab;
  
      if (pme_lb->stage == pme_lb->nstage)
      {
@@ -348,8 +433,10 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
      }
  
      set = &pme_lb->setup[pme_lb->cur];
-
      set->count++;
+
+    rtab = ir->rlistlong + ir->tabext;
+
      if (set->count % 2 == 1)
      {
          /* Skip the first cycle, because the first step after a switch
@@ -423,10 +510,10 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
                  /* Find the next setup */
                  OK = pme_loadbal_increase_cutoff(pme_lb,ir->pme_order);
              }
-                
+
              if (OK && ir->ePBC != epbcNONE)
              {
-                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlist)
+                OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
                        <= max_cutoff2(ir->ePBC,state->box));
                  if (!OK)
                  {
@@ -441,7 +528,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
                  if (DOMAINDECOMP(cr))
                  {
                      OK = change_dd_cutoff(cr,state,ir,
-                                          pme_lb->setup[pme_lb->cur].rlist);
+                                          pme_lb->setup[pme_lb->cur].rlistlong);
                      if (!OK)
                      {
                          /* Failed: do not use this setup */
@@ -506,7 +593,7 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
  
      if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
      {
-        OK = change_dd_cutoff(cr,state,ir,pme_lb->setup[pme_lb->cur].rlist);
+        OK = change_dd_cutoff(cr,state,ir,pme_lb->setup[pme_lb->cur].rlistlong);
          if (!OK)
          {
              /* Failsafe solution */
@@ -527,22 +614,28 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
  
      set = &pme_lb->setup[pme_lb->cur];
  
-    ic->rcoulomb   = set->rcut;
+    ic->rcoulomb   = set->rcut_coulomb;
      ic->rlist      = set->rlist;
+    ic->rlistlong  = set->rlistlong;
+    ir->nstcalclr  = set->nstcalclr;
      ic->ewaldcoeff = set->ewaldcoeff;
  
-    if (nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
+    if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
      {
          nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv,ic);
      }
      else
      {
-        init_interaction_const_tables(NULL,ic,nbv->grp[0].kernel_type);
+        init_interaction_const_tables(NULL,ic,ir->cutoff_scheme,
+                                      (ir->cutoff_scheme == ecutsGROUP) ? -1 : nbv->grp[0].kernel_type,
+                                      rtab);
      }
  
-    if (nbv->ngrp > 1)
+    if (pme_lb->cutoff_scheme == ecutsVERLET && nbv->ngrp > 1)
      {
-        init_interaction_const_tables(NULL,ic,nbv->grp[1].kernel_type);
+        init_interaction_const_tables(NULL,ic,ir->cutoff_scheme,
+                                      (ir->cutoff_scheme == ecutsGROUP) ? -1 : nbv->grp[1].kernel_type,
+                                      rtab);
      }
  
      if (cr->duty & DUTY_PME)
@@ -594,7 +687,7 @@ static void print_pme_loadbal_setting(FILE *fplog,
      fprintf(fplog,
              "   %-7s %6.3f nm %6.3f nm     %3d %3d %3d   %5.3f nm  %5.3f nm\n",
              name,
-            setup->rcut,setup->rlist,
+            setup->rcut_coulomb,setup->rlist,
              setup->grid[XX],setup->grid[YY],setup->grid[ZZ],
              setup->spacing,1/setup->ewaldcoeff);
  }
@@ -604,7 +697,7 @@ static void print_pme_loadbal_settings(pme_load_balancing_t pme_lb,
  {
      double pp_ratio,grid_ratio;
  
-    pp_ratio   = pow(pme_lb->setup[pme_lb->cur].rlist/pme_lb->setup[0].rlist,3.0);
+    pp_ratio   = pow(pme_lb->setup[pme_lb->cur].rlist/pme_lb->setup[0].rlistlong,3.0);
      grid_ratio = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
          (double)pme_grid_points(&pme_lb->setup[0]);
  
diff --git a/src/kernel/readir.c b/src/kernel/readir.c

index 6f0ac29fc08d5e13871863e0c753c94119562853..edaf0d31605cd030b9566db1f3348be099f3f932 100644 (file)
--- a/src/kernel/readir.c
+++ b/src/kernel/readir.c
@@ -241,12 +241,6 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
  
      if (ir->cutoff_scheme == ecutsGROUP)
      {
-        if (ir->coulomb_modifier != eintmodNONE ||
-            ir->vdw_modifier != eintmodNONE)
-        {
-            warning_error(wi,"potential modifiers are not supported (yet) with the group cut-off scheme");
-        }
-
          /* BASIC CUT-OFF STUFF */
          if (ir->rlist == 0 ||
              !((EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > ir->rlist) ||
@@ -276,7 +270,16 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
              warning_error(wi,"Can not have nstlist<=0 with twin-range interactions");
          }
      }
-
+    
+    if(ir->rlistlong == ir->rlist)
+    {
+        ir->nstcalclr = 0;
+    }
+    else if(ir->rlistlong>ir->rlist && ir->nstcalclr==0)
+    {
+        warning_error(wi,"With different cutoffs for electrostatics and VdW, nstcalclr must be -1 or a positive number");
+    }
+    
      if (ir->cutoff_scheme == ecutsVERLET)
      {
          real rc_max;
@@ -370,6 +373,28 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
          ir->rlistlong = ir->rlist;
      }
  
+    if(ir->nstcalclr==-1)
+    {
+        /* if rlist=rlistlong, this will later be changed to nstcalclr=0 */
+        ir->nstcalclr = ir->nstlist;
+    }
+    else if(ir->nstcalclr>0)
+    {
+        if(ir->nstlist>0 && (ir->nstlist % ir->nstcalclr != 0))
+        {
+            warning_error(wi,"nstlist must be evenly divisible by nstcalclr. Use nstcalclr = -1 to automatically follow nstlist");
+        }
+    }
+    else if(ir->nstcalclr<-1)
+    {
+        warning_error(wi,"nstcalclr must be a positive number (divisor of nstcalclr), or -1 to follow nstlist.");
+    }
+    
+    if(EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rvdw && ir->nstcalclr>1)
+    {
+        warning_error(wi,"When used with PME, the long-range component of twin-range interactions must be updated every step (nstcalclr)");
+    }
+       
      /* GENERAL INTEGRATOR STUFF */
      if (!(ir->eI == eiMD || EI_VV(ir->eI)))
      {
@@ -878,7 +903,8 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
    /* More checks are in triple check (grompp.c) */
  
    if (ir->coulombtype == eelSWITCH) {
-    sprintf(warn_buf,"coulombtype = %s is only for testing purposes and can lead to serious artifacts, advice: use coulombtype = %s",
+    sprintf(warn_buf,"coulombtype = %s is only for testing purposes and can lead to serious "
+            "artifacts, advice: use coulombtype = %s",
             eel_names[ir->coulombtype],
             eel_names[eelRF_ZERO]);
      warning(wi,warn_buf);
@@ -890,7 +916,7 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
    }
  
    if (EEL_RF(ir->coulombtype) && ir->epsilon_rf==1 && ir->epsilon_r!=1) {
-    sprintf(warn_buf,"epsilon-r = %g and epsilon-rf = 1 with reaction field, assuming old format and exchanging epsilon-r and epsilon-rf",ir->epsilon_r);
+    sprintf(warn_buf,"epsilon-r = %g and epsilon-rf = 1 with reaction field, proceeding assuming old format and exchanging epsilon-r and epsilon-rf",ir->epsilon_r);
      warning(wi,warn_buf);
      ir->epsilon_rf = ir->epsilon_r;
      ir->epsilon_r  = 1.0;
@@ -905,9 +931,10 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
      /* reaction field (at the cut-off) */
      
      if (ir->coulombtype == eelRF_ZERO) {
-       sprintf(err_buf,"With coulombtype = %s, epsilon-rf must be 0",
+       sprintf(warn_buf,"With coulombtype = %s, epsilon-rf must be 0, assuming you meant epsilon_rf=0",
                eel_names[ir->coulombtype]);
-      CHECK(ir->epsilon_rf != 0);
+        CHECK(ir->epsilon_rf != 0);
+        ir->epsilon_rf = 0.0;
      }
  
      sprintf(err_buf,"epsilon-rf must be >= epsilon-r");
@@ -926,37 +953,47 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
    if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype)) {
      if (EEL_SWITCHED(ir->coulombtype)) {
        sprintf(err_buf,
-             "With coulombtype = %s rcoulomb_switch must be < rcoulomb",
+             "With coulombtype = %s rcoulomb_switch must be < rcoulomb. Or, better: Use the potential modifier options!",
               eel_names[ir->coulombtype]);
        CHECK(ir->rcoulomb_switch >= ir->rcoulomb);
      }
    } else if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype)) {
-      if (ir->cutoff_scheme == ecutsGROUP) {
-          sprintf(err_buf,"With coulombtype = %s, rcoulomb must be >= rlist",
+      if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE) {
+          sprintf(err_buf,"With coulombtype = %s, rcoulomb should be >= rlist unless you use a potential modifier",
                    eel_names[ir->coulombtype]);
            CHECK(ir->rlist > ir->rcoulomb);
        }
    }
  
-  if (EEL_FULL(ir->coulombtype)) {
-    if (ir->coulombtype==eelPMESWITCH || ir->coulombtype==eelPMEUSER ||
-        ir->coulombtype==eelPMEUSERSWITCH) {
-      sprintf(err_buf,"With coulombtype = %s, rcoulomb must be <= rlist",
-             eel_names[ir->coulombtype]);
-      CHECK(ir->rcoulomb > ir->rlist);
-    } else if (ir->cutoff_scheme == ecutsGROUP) {
-      if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD) {
-       sprintf(err_buf,
-               "With coulombtype = %s, rcoulomb must be equal to rlist\n"
-               "If you want optimal energy conservation or exact integration use %s",
-               eel_names[ir->coulombtype],eel_names[eelPMESWITCH]);
-      } else { 
-       sprintf(err_buf,
-               "With coulombtype = %s, rcoulomb must be equal to rlist",
-               eel_names[ir->coulombtype]);
+  if(ir->coulombtype==eelSWITCH || ir->coulombtype==eelSHIFT ||
+     ir->vdwtype==evdwSWITCH || ir->vdwtype==evdwSHIFT)
+  {
+      sprintf(warn_buf,
+              "The switch/shift interaction settings are just for compatibility; you will get better"
+              "performance from applying potential modifiers to your interactions!\n");
+      warning_note(wi,warn_buf);
+  }
+
+  if (EEL_FULL(ir->coulombtype))
+  {
+      if (ir->coulombtype==eelPMESWITCH || ir->coulombtype==eelPMEUSER ||
+          ir->coulombtype==eelPMEUSERSWITCH)
+      {
+          sprintf(err_buf,"With coulombtype = %s, rcoulomb must be <= rlist",
+                  eel_names[ir->coulombtype]);
+          CHECK(ir->rcoulomb > ir->rlist);
+      }
+      else if (ir->cutoff_scheme == ecutsGROUP && ir->coulomb_modifier == eintmodNONE)
+      {
+          if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD)
+          {
+              sprintf(err_buf,
+                      "With coulombtype = %s (without modifier), rcoulomb must be equal to rlist.\n"
+                      "If you want optimal energy conservation, consider using a potential modifier.",
+                      eel_names[ir->coulombtype]);
+              CHECK(ir->rcoulomb != ir->rlist);
+          }
        }
-      CHECK(ir->rcoulomb != ir->rlist);
-    }
    }
  
    if (EEL_PME(ir->coulombtype)) {
@@ -977,12 +1014,12 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
    }
  
    if (EVDW_SWITCHED(ir->vdwtype)) {
-    sprintf(err_buf,"With vdwtype = %s rvdw-switch must be < rvdw",
+    sprintf(err_buf,"With vdwtype = %s rvdw-switch must be < rvdw. Or, better - use a potential modifier.",
             evdw_names[ir->vdwtype]);
      CHECK(ir->rvdw_switch >= ir->rvdw);
    } else if (ir->vdwtype == evdwCUT) {
-      if (ir->cutoff_scheme == ecutsGROUP) {
-          sprintf(err_buf,"With vdwtype = %s, rvdw must be >= rlist",evdw_names[ir->vdwtype]);
+      if (ir->cutoff_scheme == ecutsGROUP && ir->vdw_modifier == eintmodNONE) {
+          sprintf(err_buf,"With vdwtype = %s, rvdw must be >= rlist unless you use a potential modifier",evdw_names[ir->vdwtype]);
            CHECK(ir->rlist > ir->rvdw);
        }
    }
@@ -1008,13 +1045,6 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
    }
  
    if (ir->nstlist == -1) {
-    sprintf(err_buf,
-           "nstlist=-1 only works with switched or shifted potentials,\n"
-           "suggestion: use vdw-type=%s and coulomb-type=%s",
-           evdw_names[evdwSHIFT],eel_names[eelPMESWITCH]);
-    CHECK(!(EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) &&
-            EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)));
-
      sprintf(err_buf,"With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
      CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
    }
@@ -1033,13 +1063,13 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
      /* ENERGY CONSERVATION */
      if (ir_NVE(ir) && ir->cutoff_scheme == ecutsGROUP)
      {
-        if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0)
+        if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0 && ir->vdw_modifier == eintmodNONE)
          {
              sprintf(warn_buf,"You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)",
                      evdw_names[evdwSHIFT]);
              warning_note(wi,warn_buf);
          }
-        if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0)
+        if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0 && ir->coulomb_modifier == eintmodNONE)
          {
              sprintf(warn_buf,"You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s",
                      eel_names[eelPMESWITCH],eel_names[eelRF_ZERO]);
@@ -1113,6 +1143,7 @@ void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
  
      if (ir->bAdress)
      {
+        warning_error(wi,"AdResS is currently disabled\n");
          if (ir->cutoff_scheme != ecutsGROUP)
          {
              warning_error(wi,"AdresS simulation supports only cutoff-scheme=group");
@@ -1585,6 +1616,7 @@ void get_ir(const char *mdparin,const char *mdparout,
    RTYPE ("rlist",      ir->rlist,      -1);
    CTYPE ("long-range cut-off for switched potentials");
    RTYPE ("rlistlong",  ir->rlistlong,  -1);
+  ITYPE ("nstcalclr",  ir->nstcalclr,  -1);
  
    /* Electrostatics */
    CCTYPE ("OPTIONS FOR ELECTROSTATICS AND VDW");
@@ -2623,7 +2655,7 @@ void do_index(const char* mdparin, const char *ndx,
  
            if (ir->etc != etcVRESCALE && ir->opts.tau_t[i] == 0)
            {
-              warning_note(wi,"tau-t = -1 is the new value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1.");
+              warning_note(wi,"tau-t = -1 is the value to signal that a group should not have temperature coupling. Treating your use of tau-t = 0 as if you used -1.");
            }
  
            if (ir->opts.tau_t[i] >= 0)
diff --git a/src/kernel/runner.c b/src/kernel/runner.c

index e4d2403dc99873ac6187f6101d3dcc18f394daf8..d8f8a383f57a41053695bff1c9ae5f43ed3bbbfb 100644 (file)
--- a/src/kernel/runner.c
+++ b/src/kernel/runner.c
@@ -1140,7 +1140,7 @@ static void override_nsteps_cmdline(FILE *fplog,
   * before the other nodes have read the tpx file and called gmx_detect_hardware.
   */
  typedef struct {
-    int      cutoff_scheme; /* The cutoff-scheme from inputrec_t */
+    int cutoff_scheme; /* The cutoff scheme from inputrec_t */
      gmx_bool bUseGPU;       /* Use GPU or GPU emulation          */
  } master_inf_t;
  
diff --git a/src/kernel/tpbcmp.c b/src/kernel/tpbcmp.c

index 4d6fca8cb757256c9bfe4a5af5d2f6d5da2cd764..0cb55679d79db24e2a91da5e15dec35d19661426 100644 (file)
--- a/src/kernel/tpbcmp.c
+++ b/src/kernel/tpbcmp.c
@@ -632,6 +632,7 @@ static void cmp_inputrec(FILE *fp,t_inputrec *ir1,t_inputrec *ir2,real ftol, rea
    cmp_real(fp,"inputrec->verletbuf_drift",-1,ir1->verletbuf_drift,ir2->verletbuf_drift,ftol,abstol);
    cmp_real(fp,"inputrec->rlist",-1,ir1->rlist,ir2->rlist,ftol,abstol);
    cmp_real(fp,"inputrec->rlistlong",-1,ir1->rlistlong,ir2->rlistlong,ftol,abstol);
+  cmp_int(fp,"inputrec->nstcalclr",-1,ir1->nstcalclr,ir2->nstcalclr);
    cmp_real(fp,"inputrec->rtpi",-1,ir1->rtpi,ir2->rtpi,ftol,abstol);
    cmp_int(fp,"inputrec->coulombtype",-1,ir1->coulombtype,ir2->coulombtype);
    cmp_int(fp,"inputrec->coulomb_modifier",-1,ir1->coulomb_modifier,ir2->coulomb_modifier);
diff --git a/src/mdlib/adress.c b/src/mdlib/adress.c

index 4c48f83dad462d3fd57e7a44e67f8a16f30cc25a..b5c90324548d4a83ecb47be4a1dd04992bd84e01 100644 (file)
--- a/src/mdlib/adress.c
+++ b/src/mdlib/adress.c
@@ -543,12 +543,12 @@ adress_thermo_force(int                  start,
                  {
                      if (fr->n_adress_tf_grps > 0 ){
                          /* multi component tf is on, select the right table */
-                        ATFtab = fr->atf_tabs[mdatoms->tf_table_index[iatom]].tab;
+                        ATFtab = fr->atf_tabs[mdatoms->tf_table_index[iatom]].data;
                          tabscale = fr->atf_tabs[mdatoms->tf_table_index[iatom]].scale;
                      }
                      else {
                      /* just on component*/
-                        ATFtab = fr->atf_tabs[DEFAULT_TF_TABLE].tab;
+                        ATFtab = fr->atf_tabs[DEFAULT_TF_TABLE].data;
                          tabscale = fr->atf_tabs[DEFAULT_TF_TABLE].scale;
                      }
                      
diff --git a/src/mdlib/domdec.c b/src/mdlib/domdec.c

index 8eba3d7dfabc11924a4a1d6991ce85e1323d9dac..320002ef6cb2dccdecb27d13624a74073e79afc5 100644 (file)
--- a/src/mdlib/domdec.c
+++ b/src/mdlib/domdec.c
@@ -252,7 +252,7 @@ typedef struct gmx_domdec_comm
  
      /* Cell sizes for static load balancing, first index cartesian */
      real **slb_frac;
-    
+
      /* The width of the communicated boundaries */
      real cutoff_mbody;
      real cutoff;
@@ -5009,7 +5009,7 @@ static double force_flop_count(t_nrnb *nrnb)
      const char *name;
  
      sum = 0;
-    for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
+    for(i=0; i<eNR_NBKERNEL_FREE_ENERGY; i++)
      {
          /* To get closer to the real timings, we half the count
           * for the normal loops and again half it for water loops.
diff --git a/src/mdlib/force.c b/src/mdlib/force.c

index 3d3699bad0a01b492d94d16484055bf735abe26a..d967c87c397ce003e90ca16119ce18a71d6b523a 100644 (file)
--- a/src/mdlib/force.c
+++ b/src/mdlib/force.c
@@ -79,9 +79,7 @@ void ns(FILE *fp,
          real       *dvdlambda,
          gmx_grppairener_t *grppener,
          gmx_bool       bFillGrid,
-        gmx_bool       bDoLongRange,
-        gmx_bool       bDoForces,
-        rvec       *f)
+        gmx_bool       bDoLongRangeNS)
  {
    char   *ptr;
    int    nsearch;
@@ -97,8 +95,7 @@ void ns(FILE *fp,
  
      nsearch = search_neighbours(fp,fr,x,box,top,groups,cr,nrnb,md,
                                  lambda,dvdlambda,grppener,
-                                bFillGrid,bDoLongRange,
-                                bDoForces,f);
+                                bFillGrid,bDoLongRangeNS);
    if (debug)
      fprintf(debug,"nsearch = %d\n",nsearch);
  
@@ -146,6 +143,7 @@ void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
                         t_grpopts  *opts,
                         rvec       x[],      history_t  *hist,
                         rvec       f[],
+                       rvec       f_longrange[],
                         gmx_enerdata_t *enerd,
                         t_fcdata   *fcd,
                         gmx_mtop_t     *mtop,
@@ -258,18 +256,25 @@ void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
      if (flags & GMX_FORCE_NONBONDED)
      {
          donb_flags = 0;
+        /* Add short-range interactions */
+        donb_flags |= GMX_NONBONDED_DO_SR;
+
          if (flags & GMX_FORCE_FORCES)
          {
-            donb_flags |= GMX_DONB_FORCES;
+            donb_flags |= GMX_NONBONDED_DO_FORCE;
+        }
+        if (flags & GMX_FORCE_ENERGY)
+        {
+            donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
+        }
+        if (flags & GMX_FORCE_DO_LR)
+        {
+            donb_flags |= GMX_NONBONDED_DO_LR;
          }
  
          wallcycle_sub_start(wcycle, ewcsNONBONDED);
-        do_nonbonded(cr,fr,x,f,md,excl,
-                    fr->bBHAM ?
-                    enerd->grpp.ener[egBHAMSR] :
-                    enerd->grpp.ener[egLJSR],
-                    enerd->grpp.ener[egCOULSR],
-                    enerd->grpp.ener[egGB],box_size,nrnb,
+        do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
+                    &enerd->grpp,box_size,nrnb,
                      lambda,dvdl_nb,-1,-1,donb_flags);
          wallcycle_sub_stop(wcycle, ewcsNONBONDED);
      }
@@ -289,14 +294,10 @@ void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
                  lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
              }
              reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
-            do_nonbonded(cr,fr,x,f,md,excl,
-                         fr->bBHAM ?
-                         ed_lam.grpp.ener[egBHAMSR] :
-                         ed_lam.grpp.ener[egLJSR],
-                         ed_lam.grpp.ener[egCOULSR],
-                         enerd->grpp.ener[egGB], box_size,nrnb,
+            do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
+                         &(ed_lam.grpp), box_size,nrnb,
                           lam_i,dvdl_dum,-1,-1,
-                         GMX_DONB_FOREIGNLAMBDA);
+                         GMX_NONBONDED_DO_FOREIGNLAMBDA | GMX_NONBONDED_DO_SR);
              sum_epot(&ir->opts,&ed_lam);
              enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
          }
diff --git a/src/mdlib/forcerec.c b/src/mdlib/forcerec.c

index bf7b64eadcc85ca6537a548f1f3492fe9cab4327..a677e916d0279f0cfaba86ed21c85486c9c078f9 100644 (file)
--- a/src/mdlib/forcerec.c
+++ b/src/mdlib/forcerec.c
@@ -27,7 +27,7 @@
   * 
   * To help us fund GROMACS development, we humbly ask that you cite
   * the papers on the package - you can find them in the top README file.
- * 
+ *
   * For more info, check our website at http://www.gromacs.org
   * 
   * And Hey:
@@ -102,10 +102,10 @@ static void pr_nbfp(FILE *fp,real *nbfp,gmx_bool bBHAM,int atnr)
        fprintf(fp,"%2d - %2d",i,j);
        if (bBHAM)
         fprintf(fp,"  a=%10g, b=%10g, c=%10g\n",BHAMA(nbfp,atnr,i,j),
-               BHAMB(nbfp,atnr,i,j),BHAMC(nbfp,atnr,i,j));
+               BHAMB(nbfp,atnr,i,j),BHAMC(nbfp,atnr,i,j)/6.0);
        else
-       fprintf(fp,"  c6=%10g, c12=%10g\n",C6(nbfp,atnr,i,j),
-               C12(nbfp,atnr,i,j));
+       fprintf(fp,"  c6=%10g, c12=%10g\n",C6(nbfp,atnr,i,j)/6.0,
+            C12(nbfp,atnr,i,j)/12.0);
      }
    }
  }
@@ -121,9 +121,10 @@ static real *mk_nbfp(const gmx_ffparams_t *idef,gmx_bool bBHAM)
      snew(nbfp,3*atnr*atnr);
      for(i=k=0; (i<atnr); i++) {
        for(j=0; (j<atnr); j++,k++) {
-       BHAMA(nbfp,atnr,i,j) = idef->iparams[k].bham.a;
-       BHAMB(nbfp,atnr,i,j) = idef->iparams[k].bham.b;
-       BHAMC(nbfp,atnr,i,j) = idef->iparams[k].bham.c;
+          BHAMA(nbfp,atnr,i,j) = idef->iparams[k].bham.a;
+          BHAMB(nbfp,atnr,i,j) = idef->iparams[k].bham.b;
+          /* nbfp now includes the 6.0 derivative prefactor */
+          BHAMC(nbfp,atnr,i,j) = idef->iparams[k].bham.c*6.0;
        }
      }
    }
@@ -131,11 +132,13 @@ static real *mk_nbfp(const gmx_ffparams_t *idef,gmx_bool bBHAM)
      snew(nbfp,2*atnr*atnr);
      for(i=k=0; (i<atnr); i++) {
        for(j=0; (j<atnr); j++,k++) {
-       C6(nbfp,atnr,i,j)   = idef->iparams[k].lj.c6;
-       C12(nbfp,atnr,i,j)  = idef->iparams[k].lj.c12;
+          /* nbfp now includes the 6.0/12.0 derivative prefactors */
+          C6(nbfp,atnr,i,j)   = idef->iparams[k].lj.c6*6.0;
+          C12(nbfp,atnr,i,j)  = idef->iparams[k].lj.c12*12.0;
        }
      }
    }
+
    return nbfp;
  }
  
@@ -357,10 +360,9 @@ check_solvent_cg(const gmx_moltype_t   *molt,
          /* So, is it an SPC?
           * For this we require thatn all atoms have charge, 
           * the charges on atom 2 & 3 should be the same, and only
-         * atom 1 should have VdW.
+         * atom 1 might have VdW.
           */
-        if (has_vdw[0] == TRUE && 
-            has_vdw[1] == FALSE &&
+        if (has_vdw[1] == FALSE &&
              has_vdw[2] == FALSE &&
              tmp_charge[0]  != 0 &&
              tmp_charge[1]  != 0 &&
@@ -383,10 +385,9 @@ check_solvent_cg(const gmx_moltype_t   *molt,
      {
          /* Or could it be a TIP4P?
           * For this we require thatn atoms 2,3,4 have charge, but not atom 1. 
-         * Only atom 1 should have VdW.
+         * Only atom 1 mght have VdW.
           */
-        if(has_vdw[0] == TRUE && 
-           has_vdw[1] == FALSE &&
+        if(has_vdw[1] == FALSE &&
             has_vdw[2] == FALSE &&
             has_vdw[3] == FALSE &&
             tmp_charge[0]  == 0 &&
@@ -922,10 +923,12 @@ void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
                          npair_ij = tmpi*(tmpi - 1)/2;
                      }
                      if (bBHAM) {
-                        csix    += npair_ij*BHAMC(nbfp,ntp,tpi,tpj);
+                        /* nbfp now includes the 6.0 derivative prefactor */
+                        csix    += npair_ij*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
                      } else {
-                        csix    += npair_ij*   C6(nbfp,ntp,tpi,tpj);
-                        ctwelve += npair_ij*  C12(nbfp,ntp,tpi,tpj);
+                        /* nbfp now includes the 6.0/12.0 derivative prefactors */
+                        csix    += npair_ij*   C6(nbfp,ntp,tpi,tpj)/6.0;
+                        ctwelve += npair_ij*  C12(nbfp,ntp,tpi,tpj)/12.0;
                      }
                      npair += npair_ij;
                  }
@@ -965,10 +968,12 @@ void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
                                  tpj = atoms->atom[k].typeB;
                              }
                              if (bBHAM) {
-                               csix -= nmol*BHAMC(nbfp,ntp,tpi,tpj);
+                                /* nbfp now includes the 6.0 derivative prefactor */
+                               csix -= nmol*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
                              } else {
-                                csix    -= nmol*C6 (nbfp,ntp,tpi,tpj);
-                                ctwelve -= nmol*C12(nbfp,ntp,tpi,tpj);
+                                /* nbfp now includes the 6.0/12.0 derivative prefactors */
+                                csix    -= nmol*C6 (nbfp,ntp,tpi,tpj)/6.0;
+                                ctwelve -= nmol*C12(nbfp,ntp,tpi,tpj)/12.0;
                              }
                              nexcl += nmol;
                          }
@@ -1020,12 +1025,14 @@ void set_avcsixtwelve(FILE *fplog,t_forcerec *fr,const gmx_mtop_t *mtop)
                          }
                          if (bBHAM)
                          {
-                            csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj);
+                            /* nbfp now includes the 6.0 derivative prefactor */
+                            csix    += nmolc*BHAMC(nbfp,ntp,tpi,tpj)/6.0;
                          }
                          else
                          {
-                            csix    += nmolc*C6 (nbfp,ntp,tpi,tpj);
-                            ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj);
+                            /* nbfp now includes the 6.0/12.0 derivative prefactors */
+                            csix    += nmolc*C6 (nbfp,ntp,tpi,tpj)/6.0;
+                            ctwelve += nmolc*C12(nbfp,ntp,tpi,tpj)/12.0;
                          }
                          npair += nmolc;
                      }
@@ -1121,42 +1128,61 @@ static void set_bham_b_max(FILE *fplog,t_forcerec *fr,
  
  static void make_nbf_tables(FILE *fp,const output_env_t oenv,
                              t_forcerec *fr,real rtab,
-                           const t_commrec *cr,
-                           const char *tabfn,char *eg1,char *eg2,
-                           t_nblists *nbl)
+                            const t_commrec *cr,
+                            const char *tabfn,char *eg1,char *eg2,
+                            t_nblists *nbl)
  {
-  char buf[STRLEN];
-  int i,j;
+    char buf[STRLEN];
+    int i,j;
  
-  if (tabfn == NULL) {
-    if (debug)
-      fprintf(debug,"No table file name passed, can not read table, can not do non-bonded interactions\n");
-    return;
-  }
-    
-  sprintf(buf,"%s",tabfn);
-  if (eg1 && eg2)
+    if (tabfn == NULL) {
+        if (debug)
+            fprintf(debug,"No table file name passed, can not read table, can not do non-bonded interactions\n");
+        return;
+    }
+
+    sprintf(buf,"%s",tabfn);
+    if (eg1 && eg2)
      /* Append the two energy group names */
-    sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"_%s_%s.%s",
-           eg1,eg2,ftp2ext(efXVG));
-  nbl->tab = make_tables(fp,oenv,fr,MASTER(cr),buf,rtab,0);
-  /* Copy the contents of the table to separate coulomb and LJ tables too,
-   * to improve cache performance.
-   */
-
-  /* For performance reasons we want
-   * the table data to be aligned to 16-byte. The pointer could be freed
-   * but currently isn't.
-   */
-  snew_aligned(nbl->vdwtab,8*(nbl->tab.n+1),16);
-  snew_aligned(nbl->coultab,4*(nbl->tab.n+1),16);
-  
-  for(i=0; i<=nbl->tab.n; i++) {
-    for(j=0; j<4; j++)
-      nbl->coultab[4*i+j] = nbl->tab.tab[12*i+j];
-    for(j=0; j<8; j++)
-      nbl->vdwtab [8*i+j] = nbl->tab.tab[12*i+4+j];
-  }
+        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1,"_%s_%s.%s",
+                eg1,eg2,ftp2ext(efXVG));
+    nbl->table_elec_vdw = make_tables(fp,oenv,fr,MASTER(cr),buf,rtab,0);
+    /* Copy the contents of the table to separate coulomb and LJ tables too,
+     * to improve cache performance.
+     */
+    /* For performance reasons we want
+     * the table data to be aligned to 16-byte. The pointers could be freed
+     * but currently aren't.
+     */
+    nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
+    nbl->table_elec.format = nbl->table_elec_vdw.format;
+    nbl->table_elec.r = nbl->table_elec_vdw.r;
+    nbl->table_elec.n = nbl->table_elec_vdw.n;
+    nbl->table_elec.scale = nbl->table_elec_vdw.scale;
+    nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
+    nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
+    nbl->table_elec.ninteractions = 1;
+    nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
+    snew_aligned(nbl->table_elec.data,nbl->table_elec.stride*(nbl->table_elec.n+1),16);
+
+    nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
+    nbl->table_vdw.format = nbl->table_elec_vdw.format;
+    nbl->table_vdw.r = nbl->table_elec_vdw.r;
+    nbl->table_vdw.n = nbl->table_elec_vdw.n;
+    nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
+    nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
+    nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
+    nbl->table_vdw.ninteractions = 2;
+    nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
+    snew_aligned(nbl->table_vdw.data,nbl->table_vdw.stride*(nbl->table_vdw.n+1),16);
+
+    for(i=0; i<=nbl->table_elec_vdw.n; i++)
+    {
+        for(j=0; j<4; j++)
+            nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
+        for(j=0; j<8; j++)
+            nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
+    }
  }
  
  static void count_tables(int ftype1,int ftype2,const gmx_mtop_t *mtop,
@@ -1494,73 +1520,54 @@ static void pick_nbnxn_kernel(FILE *fp,
  }
  
  
-static void init_verlet_ewald_f_table(interaction_const_t *ic,
-                                      int verlet_kernel_type)
+static void init_ewald_f_table(interaction_const_t *ic,
+                               int cutoff_scheme,
+                               int  verlet_kernel_type,
+                               real rtab)
  {
-    if (nbnxn_kernel_pairlist_simple(verlet_kernel_type))
+    real maxr;
+
+    if (cutoff_scheme==ecutsGROUP || nbnxn_kernel_pairlist_simple(verlet_kernel_type))
      {
          /* With a spacing of 0.0005 we are at the force summation accuracy
           * for the SSE kernels for "normal" atomistic simulations.
           */
          ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
                                                     ic->rcoulomb);
-        ic->tabq_size  = (int)(ic->rcoulomb*ic->tabq_scale) + 2;
-#ifndef GMX_DOUBLE
-        ic->tabq_format = tableformatFDV0;
-#else
-        ic->tabq_format = tableformatF;
-#endif
+        
+        maxr = (rtab>ic->rcoulomb) ? rtab : ic->rcoulomb;
+        ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
      }
      else
      {
          ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
          /* Subtract 2 iso 1 to avoid access out of range due to rounding */
          ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
-        if (verlet_kernel_type == nbk8x8x8_CUDA)
-        {
-            /* This case is handled in the nbnxn CUDA module */
-            ic->tabq_format = tableformatNONE;
-        }
-        else
-        {
-            ic->tabq_format = tableformatF;
-        }
-    }
-
-    switch (ic->tabq_format)
-    {
-    case tableformatNONE:
-        break;
-    case tableformatF:
-        sfree_aligned(ic->tabq_coul_F);
-        sfree_aligned(ic->tabq_coul_V);
-        snew_aligned(ic->tabq_coul_F,ic->tabq_size,16);
-        snew_aligned(ic->tabq_coul_V,ic->tabq_size,16);
-        table_spline3_fill_ewald_lr(ic->tabq_coul_F,ic->tabq_coul_V,
-                                    ic->tabq_size,ic->tabq_format,
-                                    1/ic->tabq_scale,ic->ewaldcoeff);
-        break;
-    case tableformatFDV0:
-        sfree_aligned(ic->tabq_coul_F);
-        snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,16);
-        table_spline3_fill_ewald_lr(ic->tabq_coul_FDV0,NULL,
-                                    ic->tabq_size,ic->tabq_format,
-                                    1/ic->tabq_scale,ic->ewaldcoeff);
-        break;
-    default:
-        gmx_incons("Unknown table format");
      }
+
+    sfree_aligned(ic->tabq_coul_FDV0);
+    sfree_aligned(ic->tabq_coul_F);
+    sfree_aligned(ic->tabq_coul_V);
+
+    /* Create the original table data in FDV0 */
+    snew_aligned(ic->tabq_coul_FDV0,ic->tabq_size*4,16);
+    snew_aligned(ic->tabq_coul_F,ic->tabq_size,16);
+    snew_aligned(ic->tabq_coul_V,ic->tabq_size,16);
+    table_spline3_fill_ewald_lr(ic->tabq_coul_F,ic->tabq_coul_V,ic->tabq_coul_FDV0,
+                                ic->tabq_size,1/ic->tabq_scale,ic->ewaldcoeff);
  }
  
  void init_interaction_const_tables(FILE *fp, 
                                     interaction_const_t *ic,
-                                   int verlet_kernel_type)
+                                   int cutoff_scheme,
+                                   int verlet_kernel_type,
+                                   real rtab)
  {
      real spacing;
  
      if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
      {
-        init_verlet_ewald_f_table(ic,verlet_kernel_type);
+        init_ewald_f_table(ic,cutoff_scheme,verlet_kernel_type,rtab);
  
          if (fp != NULL)
          {
@@ -1572,17 +1579,24 @@ void init_interaction_const_tables(FILE *fp,
  
  void init_interaction_const(FILE *fp, 
                              interaction_const_t **interaction_const,
-                            const t_forcerec *fr)
+                            const t_forcerec *fr,
+                            real  rtab)
  {
      interaction_const_t *ic;
  
      snew(ic, 1);
  
-    ic->rlist       = fr->rlist;
+    /* Just allocate something so we can free it */
+    snew_aligned(ic->tabq_coul_FDV0,16,16);
+    snew_aligned(ic->tabq_coul_F,16,16);
+    snew_aligned(ic->tabq_coul_V,16,16);
  
+    ic->rlist       = fr->rlist;
+    ic->rlistlong   = fr->rlistlong;
+    
      /* Lennard-Jones */
      ic->rvdw        = fr->rvdw;
-    if (fr->vdw_pot_shift)
+    if (fr->vdw_modifier==eintmodPOTSHIFT)
      {
          ic->sh_invrc6 = pow(ic->rvdw,-6.0);
      }
@@ -1599,7 +1613,7 @@ void init_interaction_const(FILE *fp,
  
      /* Ewald */
      ic->ewaldcoeff  = fr->ewaldcoeff;
-    if (fr->coul_pot_shift)
+    if (fr->coulomb_modifier==eintmodPOTSHIFT)
      {
          ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
      }
@@ -1620,7 +1634,7 @@ void init_interaction_const(FILE *fp,
          /* For plain cut-off we might use the reaction-field kernels */
          ic->epsilon_rf = ic->epsilon_r;
          ic->k_rf       = 0;
-        if (fr->coul_pot_shift)
+        if (fr->coulomb_modifier==eintmodPOTSHIFT)
          {
              ic->c_rf   = 1/ic->rcoulomb;
          }
@@ -1652,10 +1666,14 @@ void init_interaction_const(FILE *fp,
          nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv);
      }
  
-    if (fr->cutoff_scheme == ecutsVERLET)
+    if(fr->cutoff_scheme == ecutsGROUP)
+    {
+        init_interaction_const_tables(fp,ic,ecutsGROUP,-1,rtab);
+    }
+    else
      {
          assert(fr->nbv != NULL && fr->nbv->grp != NULL);
-        init_interaction_const_tables(fp,ic,fr->nbv->grp[fr->nbv->ngrp-1].kernel_type);
+        init_interaction_const_tables(fp,ic,ecutsVERLET,fr->nbv->grp[fr->nbv->ngrp-1].kernel_type,rtab);
      }
  }
  
@@ -1930,15 +1948,25 @@ void init_forcerec(FILE *fp,
      }
  
      bGenericKernelOnly = FALSE;
+
+    /* We now check in the NS code whether a particular combination of interactions
+     * can be used with water optimization, and disable it if that is not the case.
+     */
+
      if (getenv("GMX_NB_GENERIC") != NULL)
      {
          if (fp != NULL)
          {
              fprintf(fp,
                      "Found environment variable GMX_NB_GENERIC.\n"
-                    "Disabling interaction-specific nonbonded kernels.\n\n");
+                    "Disabling all interaction-specific nonbonded kernels, will only\n"
+                    "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
          }
          bGenericKernelOnly = TRUE;
+    }
+
+    if (bGenericKernelOnly==TRUE)
+    {
          bNoSolvOpt         = TRUE;
      }
  
@@ -1953,6 +1981,8 @@ void init_forcerec(FILE *fp,
          }
      }
  
+    fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
+
      /* Check if we can/should do all-vs-all kernels */
      fr->bAllvsAll       = can_use_allvsall(ir,mtop,FALSE,NULL,NULL);
      fr->AllvsAll_work   = NULL;
@@ -2001,6 +2031,7 @@ void init_forcerec(FILE *fp,
              fr->bMolPBC = dd_bonded_molpbc(cr->dd,fr->ePBC);
          }
      }
+
      fr->rc_scaling = ir->refcoord_scaling;
      copy_rvec(ir->posres_com,fr->posres_com);
      copy_rvec(ir->posres_comB,fr->posres_comB);
@@ -2009,9 +2040,77 @@ void init_forcerec(FILE *fp,
      fr->eeltype    = ir->coulombtype;
      fr->vdwtype    = ir->vdwtype;
  
-    fr->coul_pot_shift = (ir->coulomb_modifier == eintmodPOTSHIFT);
-    fr->vdw_pot_shift  = (ir->vdw_modifier     == eintmodPOTSHIFT);
-    
+    fr->coulomb_modifier = ir->coulomb_modifier;
+    fr->vdw_modifier     = ir->vdw_modifier;
+
+    /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
+    switch(fr->eeltype)
+    {
+        case eelCUT:
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_COULOMB;
+            break;
+
+        case eelRF:
+        case eelGRF:
+        case eelRF_NEC:
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
+            break;
+
+        case eelRF_ZERO:
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
+            fr->coulomb_modifier          = eintmodEXACTCUTOFF;
+            break;
+
+        case eelSWITCH:
+        case eelSHIFT:
+        case eelUSER:
+        case eelENCADSHIFT:
+        case eelPMESWITCH:
+        case eelPMEUSER:
+        case eelPMEUSERSWITCH:
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
+            break;
+
+        case eelPME:
+        case eelEWALD:
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
+            break;
+
+        default:
+            gmx_fatal(FARGS,"Unsupported electrostatic interaction: %s",eel_names[fr->eeltype]);
+            break;
+    }
+
+    /* Vdw: Translate from mdp settings to kernel format */
+    switch(fr->vdwtype)
+    {
+        case evdwCUT:
+            if(fr->bBHAM)
+            {
+                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
+            }
+            else
+            {
+                fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
+            }
+            break;
+
+        case evdwSWITCH:
+        case evdwSHIFT:
+        case evdwUSER:
+        case evdwENCADSHIFT:
+            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
+            break;
+
+        default:
+            gmx_fatal(FARGS,"Unsupported vdw interaction: %s",evdw_names[fr->vdwtype]);
+            break;
+    }
+
+    /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
+    fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
+    fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
+
      fr->bTwinRange = fr->rlistlong > fr->rlist;
      fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype==eelEWALD);
      
@@ -2021,8 +2120,33 @@ void init_forcerec(FILE *fp,
      {
          fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
                            !gmx_within_tol(fr->reppow,12.0,10*GMX_DOUBLE_EPS));
-        fr->bcoultab   = (!(fr->eeltype == eelCUT || EEL_RF(fr->eeltype)) ||
-                          fr->eeltype == eelRF_ZERO);
+        /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
+        fr->bcoultab   = !(fr->eeltype == eelCUT ||
+                           fr->eeltype == eelEWALD ||
+                           fr->eeltype == eelPME ||
+                           fr->eeltype == eelRF ||
+                           fr->eeltype == eelRF_ZERO);
+
+        /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
+         * going to be faster to tabulate the interaction than calling the generic kernel.
+         */
+        if(fr->nbkernel_elec_modifier==eintmodPOTSWITCH && fr->nbkernel_vdw_modifier==eintmodPOTSWITCH)
+        {
+            if((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
+            {
+                fr->bcoultab = TRUE;
+            }
+        }
+        else if((fr->nbkernel_elec_modifier==eintmodPOTSHIFT && fr->nbkernel_vdw_modifier==eintmodPOTSHIFT) ||
+                ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
+                  fr->nbkernel_elec_modifier==eintmodEXACTCUTOFF &&
+                  (fr->nbkernel_vdw_modifier==eintmodPOTSWITCH || fr->nbkernel_vdw_modifier==eintmodPOTSHIFT))))
+        {
+            if(fr->rcoulomb != fr->rvdw)
+            {
+                fr->bcoultab = TRUE;
+            }
+        }
  
          if (getenv("GMX_REQUIRE_TABLES"))
          {
@@ -2035,6 +2159,17 @@ void init_forcerec(FILE *fp,
              fprintf(fp,"Table routines are used for coulomb: %s\n",bool_names[fr->bcoultab]);
              fprintf(fp,"Table routines are used for vdw:     %s\n",bool_names[fr->bvdwtab ]);
          }
+
+        if(fr->bvdwtab==TRUE)
+        {
+            fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
+            fr->nbkernel_vdw_modifier    = eintmodNONE;
+        }
+        if(fr->bcoultab==TRUE)
+        {
+            fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
+            fr->nbkernel_elec_modifier    = eintmodNONE;
+        }
      }
  
      if (ir->cutoff_scheme == ecutsVERLET)
@@ -2126,7 +2261,6 @@ void init_forcerec(FILE *fp,
      
      if (fr->nbfp == NULL) {
          fr->ntype = mtop->ffparams.atnr;
-        fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
          fr->nbfp  = mk_nbfp(&mtop->ffparams,fr->bBHAM);
      }
      
@@ -2236,10 +2370,10 @@ void init_forcerec(FILE *fp,
       * A little unnecessary to make both vdw and coul tables sometimes,
       * but what the heck... */
      
-    bTab = fr->bcoultab || fr->bvdwtab;
+    bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
  
      bSep14tab = ((!bTab || fr->eeltype!=eelCUT || fr->vdwtype!=evdwCUT ||
-                  fr->bBHAM) &&
+                  fr->bBHAM || fr->bEwald) &&
                   (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
                    gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0 ||
                    gmx_mtop_ftype_count(mtop,F_LJC_PAIRS_NB) > 0));
@@ -2284,7 +2418,7 @@ void init_forcerec(FILE *fp,
          if (bNormalnblists) {
              make_nbf_tables(fp,oenv,fr,rtab,cr,tabfn,NULL,NULL,&fr->nblists[0]);
              if (!bSep14tab)
-                fr->tab14 = fr->nblists[0].tab;
+                fr->tab14 = fr->nblists[0].table_elec_vdw;
              m = 1;
          } else {
              m = 0;
@@ -2395,14 +2529,16 @@ void init_forcerec(FILE *fp,
      
      /* Initialize neighbor search */
      init_ns(fp,cr,&fr->ns,fr,mtop,box);
-    
+
      if (cr->duty & DUTY_PP)
      {
-        gmx_setup_kernels(fp,fr,bGenericKernelOnly);
-        if (ir->bAdress)
+        gmx_nonbonded_setup(fp,fr,bGenericKernelOnly);
+    /*
+     if (ir->bAdress)
          {
              gmx_setup_adress_kernels(fp,bGenericKernelOnly);
          }
+     */
      }
  
      /* Initialize the thread working data for bonded interactions */
@@ -2418,13 +2554,10 @@ void init_forcerec(FILE *fp,
          }
  
          init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
-
-        /* initialize interaction constants
-         * TODO should be moved out during modularization.
-         */
-        init_interaction_const(fp, &fr->ic, fr);
      }
  
+    /* fr->ic is used both by verlet and group kernels (to some extent) now */
+    init_interaction_const(fp, &fr->ic, fr, rtab);
      if (ir->eDispCorr != edispcNO)
      {
          calc_enervirdiff(fp,ir->eDispCorr,fr);
@@ -2447,7 +2580,7 @@ void pr_forcerec(FILE *fp,t_forcerec *fr,t_commrec *cr)
    /*pr_int(fp,fr->cg0);
      pr_int(fp,fr->hcg);*/
    for(i=0; i<fr->nnblists; i++)
-    pr_int(fp,fr->nblists[i].tab.n);
+    pr_int(fp,fr->nblists[i].table_elec_vdw.n);
    pr_real(fp,fr->rcoulomb_switch);
    pr_real(fp,fr->rcoulomb);
    
diff --git a/src/mdlib/genborn.c b/src/mdlib/genborn.c

index 0cd82072ef3e517dca49d57e02e9353888ec5234..4a0905cbe3e355f24e9186c616c06993bb2d5540 100644 (file)
--- a/src/mdlib/genborn.c
+++ b/src/mdlib/genborn.c
@@ -1120,7 +1120,8 @@ int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir,gmx_localtop_t *to
  #else
              genborn_allvsall_calc_still_radii(fr,md,born,top,x[0],cr,&fr->AllvsAll_workgb);
  #endif
-            inc_nrnb(nrnb,eNR_BORN_AVA_RADII_STILL,cnt);
+            /* 13 flops in outer loop, 47 flops in inner loop */
+            inc_nrnb(nrnb,eNR_BORN_AVA_RADII_STILL,md->homenr*13+cnt*47);
          }
          else if(ir->gb_algorithm==egbHCT || ir->gb_algorithm==egbOBC)
          {
@@ -1140,14 +1141,13 @@ int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir,gmx_localtop_t *to
  #else
              genborn_allvsall_calc_hct_obc_radii(fr,md,born,ir->gb_algorithm,top,x[0],cr,&fr->AllvsAll_workgb);
  #endif
-            inc_nrnb(nrnb,eNR_BORN_AVA_RADII_HCT_OBC,cnt);
+            /* 24 flops in outer loop, 183 in inner */
+            inc_nrnb(nrnb,eNR_BORN_AVA_RADII_HCT_OBC,md->homenr*24+cnt*183);
          }
          else
          {
              gmx_fatal(FARGS,"Bad gb algorithm for all-vs-all interactions");
          }
-        inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,md->homenr);
-
          return 0;
      }
      
@@ -1279,17 +1279,18 @@ int calc_gb_rad(t_commrec *cr, t_forcerec *fr, t_inputrec *ir,gmx_localtop_t *to
          switch(ir->gb_algorithm)
          {
              case egbSTILL:
-                inc_nrnb(nrnb,eNR_BORN_RADII_STILL,nl->nrj);
+                /* 17 flops per outer loop iteration, 47 flops per inner loop */
+                inc_nrnb(nrnb,eNR_BORN_RADII_STILL,nl->nri*17+nl->nrj*47);
                  break;
              case egbHCT:
              case egbOBC:
-                inc_nrnb(nrnb,eNR_BORN_RADII_HCT_OBC,nl->nrj);
+                /* 61 (assuming 10 for tanh) flops for outer loop iteration, 183 flops per inner loop */
+                inc_nrnb(nrnb,eNR_BORN_RADII_HCT_OBC,nl->nri*61+nl->nrj*183);
                  break;
                  
              default:
                  break;
          }
-        inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,nl->nri);
      }
      
      return 0;        
@@ -1643,7 +1644,7 @@ calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t
    
    /* Calculate the bonded GB-interactions using either table or analytical formula */
      enerd->term[F_GBPOL]       += gb_bonds_tab(x,f,fr->fshift, md->chargeA,&(fr->gbtabscale),
-                                     fr->invsqrta,fr->dvda,fr->gbtab.tab,idef,born->epsilon_r,born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph);
+                                     fr->invsqrta,fr->dvda,fr->gbtab.data,idef,born->epsilon_r,born->gb_epsilon_solvent, fr->epsfac, pbc_null, graph);
      
      /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
      enerd->term[F_GBPOL]       += calc_gb_selfcorrections(cr,born->nr,md->chargeA, born, fr->dvda, md, fr->epsfac);         
@@ -1678,8 +1679,8 @@ calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t
          genborn_allvsall_calc_chainrule(fr,md,born,x[0],f[0],gb_algorithm,fr->AllvsAll_workgb);
  #endif
          cnt = md->homenr*(md->nr/2+1);
-        inc_nrnb(nrnb,eNR_BORN_AVA_CHAINRULE,cnt);
-        inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,md->homenr);
+        /* 9 flops for outer loop, 15 for inner */
+        inc_nrnb(nrnb,eNR_BORN_AVA_CHAINRULE,md->homenr*9+cnt*15);
          return;
      }
      
@@ -1706,9 +1707,8 @@ calc_gb_forces(t_commrec *cr, t_mdatoms *md, gmx_genborn_t *born, gmx_localtop_t
  
      if(!fr->bAllvsAll)
      {
-        inc_nrnb(nrnb,eNR_BORN_CHAINRULE,fr->gblist.nrj);
-        inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,fr->gblist.nri);
-
+        /* 9 flops for outer loop, 15 for inner */
+        inc_nrnb(nrnb,eNR_BORN_CHAINRULE,fr->gblist.nri*9+fr->gblist.nrj*15);
      }
  }
  
diff --git a/src/mdlib/minimize.c b/src/mdlib/minimize.c

index d14f434eedeb83f59610d324d04b818a5a71678f..5c6aec2a7970ed55860f5268dab5bc27ca9ac428 100644 (file)
--- a/src/mdlib/minimize.c
+++ b/src/mdlib/minimize.c
@@ -704,7 +704,7 @@ static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
               ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
               GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
               GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
-             (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
+             (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
  
      /* Clear the unused shake virial and pressure */
      clear_mat(shake_vir);
diff --git a/src/mdlib/nbnxn_atomdata.c b/src/mdlib/nbnxn_atomdata.c

index 0441011087ea345e686a047b20c9093fad2146ff..fb3614274ca99f3921a390d054cc80bedb71c193 100644 (file)
--- a/src/mdlib/nbnxn_atomdata.c
+++ b/src/mdlib/nbnxn_atomdata.c
@@ -506,14 +506,16 @@ void nbnxn_atomdata_init(FILE *fp,
          {
              if (i < ntype && j < ntype)
              {
-                /* We store the prefactor in the derivative of the potential
-                 * in the parameter to avoid multiplications in the inner loop.
+                /* fr->nbfp has been updated, so that array too now stores c6/c12 including
+                 * the 6.0/12.0 prefactors to save 2 flops in the most common case (force-only).
                   */
                  c6  = nbfp[(i*ntype+j)*2  ];
                  c12 = nbfp[(i*ntype+j)*2+1];
-                nbat->nbfp[(i*nbat->ntype+j)*2  ] =  6.0*c6;
-                nbat->nbfp[(i*nbat->ntype+j)*2+1] = 12.0*c12;
-
+                nbat->nbfp[(i*nbat->ntype+j)*2  ] = c6;
+                nbat->nbfp[(i*nbat->ntype+j)*2+1] = c12;
+                c6  /= 6.0;
+                c12 /= 12.0;
+                
                  bCombGeom = bCombGeom &&
                      gmx_within_tol(c6*c6  ,nbfp[(i*ntype+i)*2  ]*nbfp[(j*ntype+j)*2  ],tol) &&
                      gmx_within_tol(c12*c12,nbfp[(i*ntype+i)*2+1]*nbfp[(j*ntype+j)*2+1],tol);
diff --git a/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index a97dd18d82026b3ee3c831dbe98af081503b9505..a323b95c3e7ffab9ebe10e3d1fbc9346b5d2958f 100644 (file)
--- a/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -89,7 +89,7 @@ static void init_ewald_coulomb_force_table(cu_nbparam_t *nbp)
  
      pmalloc((void**)&ftmp, tabsize*sizeof(*ftmp));
  
-    table_spline3_fill_ewald_lr(ftmp, NULL, tabsize, tableformatF,
+    table_spline3_fill_ewald_lr(ftmp, NULL, NULL, tabsize,
                                  1/tabscale, nbp->ewald_beta);
  
      /* If the table pointer == NULL the table is generated the first time =>
diff --git a/src/mdlib/ns.c b/src/mdlib/ns.c

index 571e7047c78ebb574d99daf8e671a13628e9bc91..10dc410b49b96e96a1218f37193b54451b414f3d 100644 (file)
--- a/src/mdlib/ns.c
+++ b/src/mdlib/ns.c
@@ -17,7 +17,7 @@
   * modify it under the terms of the GNU General Public License
   * as published by the Free Software Foundation; either version 2
   * of the License, or (at your option) any later version.
- * 
+ *
   * If you want to redistribute modifications, please consider that
   * scientific software is very special. Version control is crucial -
   * bugs must be traceable. We will be happy to consider code for
@@ -94,11 +94,11 @@ static void reallocate_nblist(t_nblist *nl)
  {
      if (gmx_debug_at)
      {
-        fprintf(debug,"reallocating neigborlist il_code=%d, maxnri=%d\n",
-                nl->il_code,nl->maxnri); 
+        fprintf(debug,"reallocating neigborlist (ielec=%d, ivdw=%d, igeometry=%d, free_energy=%d), maxnri=%d\n",
+                nl->ielec,nl->ivdw,nl->igeometry,nl->free_energy,nl->maxnri);
      }
      srenew(nl->iinr,   nl->maxnri);
-    if (nl->enlist == enlistCG_CG)
+    if (nl->igeometry == GMX_NBLIST_GEOMETRY_CG_CG)
      {
          srenew(nl->iinr_end,nl->maxnri);
      }
@@ -107,60 +107,17 @@ static void reallocate_nblist(t_nblist *nl)
      srenew(nl->jindex, nl->maxnri+1);
  }
  
-/* ivdw/icoul are used to determine the type of interaction, so we
- * can set an innerloop index here. The obvious choice for this would have
- * been the vdwtype/coultype values in the forcerecord, but unfortunately 
- * those types are braindead - for instance both Buckingham and normal 
- * Lennard-Jones use the same value (evdwCUT), and a separate gmx_boolean variable
- * to determine which interaction is used. There is further no special value
- * for 'no interaction'. For backward compatibility with old TPR files we won't
- * change this in the 3.x series, so when calling this routine you should use:
- *
- * icoul=0 no coulomb interaction
- * icoul=1 cutoff standard coulomb
- * icoul=2 reaction-field coulomb
- * icoul=3 tabulated coulomb
- *
- * ivdw=0 no vdw interaction
- * ivdw=1 standard L-J interaction
- * ivdw=2 Buckingham
- * ivdw=3 tabulated vdw.
- *
- * Kind of ugly, but it works.
- */
-static void init_nblist(t_nblist *nl_sr,t_nblist *nl_lr,
+
+static void init_nblist(FILE *log, t_nblist *nl_sr,t_nblist *nl_lr,
                          int maxsr,int maxlr,
-                        int ivdw, int icoul, 
-                        gmx_bool bfree, int enlist)
+                        int ivdw, int ivdwmod,
+                        int ielec, int ielecmod,
+                        gmx_bool bfree, int igeometry)
  {
      t_nblist *nl;
      int      homenr;
      int      i,nn;
      
-    int inloop[20] =
-    { 
-        eNR_NBKERNEL_NONE,
-        eNR_NBKERNEL010,
-        eNR_NBKERNEL020,
-        eNR_NBKERNEL030,
-        eNR_NBKERNEL100,
-        eNR_NBKERNEL110,
-        eNR_NBKERNEL120,
-        eNR_NBKERNEL130,
-        eNR_NBKERNEL200,
-        eNR_NBKERNEL210,
-        eNR_NBKERNEL220,
-        eNR_NBKERNEL230,
-        eNR_NBKERNEL300,
-        eNR_NBKERNEL310,
-        eNR_NBKERNEL320,
-        eNR_NBKERNEL330,
-        eNR_NBKERNEL400,
-        eNR_NBKERNEL410,
-        eNR_NBKERNEL_NONE,
-        eNR_NBKERNEL430
-    };
-  
      for(i=0; (i<2); i++)
      {
          nl     = (i == 0) ? nl_sr : nl_lr;
@@ -170,47 +127,24 @@ static void init_nblist(t_nblist *nl_sr,t_nblist *nl_lr,
          {
              continue;
          }
-        
+
+
          /* Set coul/vdw in neighborlist, and for the normal loops we determine
           * an index of which one to call.
           */
-        nl->ivdw  = ivdw;
-        nl->icoul = icoul;
+        nl->ivdw        = ivdw;
+        nl->ivdwmod     = ivdwmod;
+        nl->ielec       = ielec;
+        nl->ielecmod    = ielecmod;
          nl->free_energy = bfree;
-    
+        nl->igeometry   = igeometry;
+
          if (bfree)
          {
-            nl->enlist  = enlistATOM_ATOM;
-            nl->il_code = eNR_NBKERNEL_FREE_ENERGY;
+            nl->igeometry  = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
          }
-        else
-        {
-            nl->enlist = enlist;
-
-            nn = inloop[4*icoul + ivdw];
-            
-            /* solvent loops follow directly after the corresponding
-            * ordinary loops, in the order:
-            *
-            * SPC, SPC-SPC, TIP4p, TIP4p-TIP4p
-            *   
-            */
-            switch (enlist) {
-            case enlistATOM_ATOM:
-            case enlistCG_CG:
-                break;
-            case enlistSPC_ATOM:     nn += 1; break;
-            case enlistSPC_SPC:      nn += 2; break;
-            case enlistTIP4P_ATOM:   nn += 3; break;
-            case enlistTIP4P_TIP4P:  nn += 4; break;
-            }
-            
-            nl->il_code = nn;
-        }
-
-        if (debug)
-            fprintf(debug,"Initiating neighbourlist type %d for %s interactions,\nwith %d SR, %d LR atoms.\n",
-                    nl->il_code,ENLISTTYPE(enlist),maxsr,maxlr);
+        
+        gmx_nonbonded_set_kernel_pointers( (i==0) ? log : NULL,nl);
          
          /* maxnri is influenced by the number of shifts (maximum is 8)
           * and the number of energy groups.
@@ -229,6 +163,13 @@ static void init_nblist(t_nblist *nl_sr,t_nblist *nl_lr,
          nl->jindex      = NULL;
          reallocate_nblist(nl);
          nl->jindex[0] = 0;
+
+        if(debug)
+        {
+            fprintf(debug,"Initiating neighbourlist (ielec=%d, ivdw=%d, free=%d) for %s interactions,\nwith %d SR, %d LR atoms.\n",
+                    nl->ielec,nl->ivdw,nl->free_energy,gmx_nblist_geometry_names[nl->igeometry],maxsr,maxlr);
+        }
+
  #ifdef GMX_THREAD_SHM_FDECOMP
          nl->counter = 0;
          snew(nl->mtx,1);
@@ -245,9 +186,9 @@ void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
      * cache trashing.
      */
     int maxsr,maxsr_wat,maxlr,maxlr_wat;
-   int icoul,icoulf,ivdw;
+   int ielec,ielecf,ivdw,ielecmod,ielecmodf,ivdwmod;
     int solvent;
-   int enlist_def,enlist_w,enlist_ww;
+   int igeometry_def,igeometry_w,igeometry_ww;
     int i;
     t_nblists *nbl;
  
@@ -274,46 +215,20 @@ void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
       maxlr = maxlr_wat = 0;
     }  
  
-   /* Determine the values for icoul/ivdw. */
-   /* Start with GB */
-   if(fr->bGB)
-   {
-       icoul=enbcoulGB;
-   }
-   else if (fr->bcoultab)
-   {
-       icoul = enbcoulTAB;
-   }
-   else if (EEL_RF(fr->eeltype))
-   {
-       icoul = enbcoulRF;
-   }
-   else 
-   {
-       icoul = enbcoulOOR;
-   }
-   
-   if (fr->bvdwtab)
-   {
-       ivdw = enbvdwTAB;
-   }
-   else if (fr->bBHAM)
-   {
-       ivdw = enbvdwBHAM;
-   }
-   else 
-   {
-       ivdw = enbvdwLJ;
-   }
+   /* Determine the values for ielec/ivdw. */
+   ielec = fr->nbkernel_elec_interaction;
+   ivdw  = fr->nbkernel_vdw_interaction;
+   ielecmod = fr->nbkernel_elec_modifier;
+   ivdwmod  = fr->nbkernel_vdw_modifier;
  
     fr->ns.bCGlist = (getenv("GMX_NBLISTCG") != 0);
     if (!fr->ns.bCGlist)
     {
-       enlist_def = enlistATOM_ATOM;
+       igeometry_def = GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE;
     }
     else
     {
-       enlist_def = enlistCG_CG;
+       igeometry_def = GMX_NBLIST_GEOMETRY_CG_CG;
         if (log != NULL)
         {
             fprintf(log,"\nUsing charge-group - charge-group neighbor lists and kernels\n\n");
@@ -321,55 +236,75 @@ void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
     }
     
     if (fr->solvent_opt == esolTIP4P) {
-       enlist_w  = enlistTIP4P_ATOM;
-       enlist_ww = enlistTIP4P_TIP4P;
+       igeometry_w  = GMX_NBLIST_GEOMETRY_WATER4_PARTICLE;
+       igeometry_ww = GMX_NBLIST_GEOMETRY_WATER4_WATER4;
     } else {
-       enlist_w  = enlistSPC_ATOM;
-       enlist_ww = enlistSPC_SPC;
+       igeometry_w  = GMX_NBLIST_GEOMETRY_WATER3_PARTICLE;
+       igeometry_ww = GMX_NBLIST_GEOMETRY_WATER3_WATER3;
     }
  
     for(i=0; i<fr->nnblists; i++) 
     {
         nbl = &(fr->nblists[i]);
-       init_nblist(&nbl->nlist_sr[eNL_VDWQQ],&nbl->nlist_lr[eNL_VDWQQ],
-                   maxsr,maxlr,ivdw,icoul,FALSE,enlist_def);
-       init_nblist(&nbl->nlist_sr[eNL_VDW],&nbl->nlist_lr[eNL_VDW],
-                   maxsr,maxlr,ivdw,0,FALSE,enlist_def);
-       init_nblist(&nbl->nlist_sr[eNL_QQ],&nbl->nlist_lr[eNL_QQ],
-                   maxsr,maxlr,0,icoul,FALSE,enlist_def);
-       init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATER],&nbl->nlist_lr[eNL_VDWQQ_WATER],
-                   maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_w);
-       init_nblist(&nbl->nlist_sr[eNL_QQ_WATER],&nbl->nlist_lr[eNL_QQ_WATER],
-                   maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_w);
-       init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATERWATER],&nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
-                   maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_ww);
-       init_nblist(&nbl->nlist_sr[eNL_QQ_WATERWATER],&nbl->nlist_lr[eNL_QQ_WATERWATER],
-                   maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_ww);
+       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ],&nbl->nlist_lr[eNL_VDWQQ],
+                   maxsr,maxlr,ivdw,ivdwmod,ielec,ielecmod,FALSE,igeometry_def);
+       init_nblist(log,&nbl->nlist_sr[eNL_VDW],&nbl->nlist_lr[eNL_VDW],
+                   maxsr,maxlr,ivdw,ivdwmod,GMX_NBKERNEL_ELEC_NONE,eintmodNONE,FALSE,igeometry_def);
+       init_nblist(log,&nbl->nlist_sr[eNL_QQ],&nbl->nlist_lr[eNL_QQ],
+                   maxsr,maxlr,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod,FALSE,igeometry_def);
+       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_WATER],&nbl->nlist_lr[eNL_VDWQQ_WATER],
+                   maxsr_wat,maxlr_wat,ivdw,ivdwmod,ielec,ielecmod, FALSE,igeometry_w);
+       init_nblist(log,&nbl->nlist_sr[eNL_QQ_WATER],&nbl->nlist_lr[eNL_QQ_WATER],
+                   maxsr_wat,maxlr_wat,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod, FALSE,igeometry_w);
+       init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_WATERWATER],&nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
+                   maxsr_wat,maxlr_wat,ivdw,ivdwmod,ielec,ielecmod, FALSE,igeometry_ww);
+       init_nblist(log,&nbl->nlist_sr[eNL_QQ_WATERWATER],&nbl->nlist_lr[eNL_QQ_WATERWATER],
+                   maxsr_wat,maxlr_wat,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielec,ielecmod, FALSE,igeometry_ww);
+
+       /* Did we get the solvent loops so we can use optimized water kernels? */
+       if(nbl->nlist_sr[eNL_VDWQQ_WATER].kernelptr_vf==NULL
+          || nbl->nlist_sr[eNL_QQ_WATER].kernelptr_vf==NULL
+#ifndef DISABLE_WATERWATER_NLIST
+          || nbl->nlist_sr[eNL_VDWQQ_WATERWATER].kernelptr_vf==NULL
+          || nbl->nlist_sr[eNL_QQ_WATERWATER].kernelptr_vf==NULL
+#endif
+          )
+       {
+           fr->solvent_opt = esolNO;
+           fprintf(log,"Note: The available nonbonded kernels do not support water optimization - disabling.\n");
+       }
         
         if (fr->efep != efepNO) 
         {
             if ((fr->bEwald) && (fr->sc_alphacoul > 0)) /* need to handle long range differently if using softcore */
             {
-               icoulf = enbcoulFEWALD;
+               ielecf = GMX_NBKERNEL_ELEC_EWALD;
+               ielecmodf = eintmodNONE;
             }
             else
             {
-               icoulf = icoul;
+               ielecf = ielec;
+               ielecmodf = ielecmod;
             }
  
-           init_nblist(&nbl->nlist_sr[eNL_VDWQQ_FREE],&nbl->nlist_lr[eNL_VDWQQ_FREE],
-                       maxsr,maxlr,ivdw,icoulf,TRUE,enlistATOM_ATOM);
-           init_nblist(&nbl->nlist_sr[eNL_VDW_FREE],&nbl->nlist_lr[eNL_VDW_FREE],
-                       maxsr,maxlr,ivdw,0,TRUE,enlistATOM_ATOM);
-           init_nblist(&nbl->nlist_sr[eNL_QQ_FREE],&nbl->nlist_lr[eNL_QQ_FREE],
-                       maxsr,maxlr,0,icoulf,TRUE,enlistATOM_ATOM);
+           init_nblist(log,&nbl->nlist_sr[eNL_VDWQQ_FREE],&nbl->nlist_lr[eNL_VDWQQ_FREE],
+                       maxsr,maxlr,ivdw,ivdwmod,ielecf,ielecmod,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
+           init_nblist(log,&nbl->nlist_sr[eNL_VDW_FREE],&nbl->nlist_lr[eNL_VDW_FREE],
+                       maxsr,maxlr,ivdw,ivdwmod,GMX_NBKERNEL_ELEC_NONE,eintmodNONE,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
+           init_nblist(log,&nbl->nlist_sr[eNL_QQ_FREE],&nbl->nlist_lr[eNL_QQ_FREE],
+                       maxsr,maxlr,GMX_NBKERNEL_VDW_NONE,eintmodNONE,ielecf,ielecmod,TRUE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
         }  
     }
     /* QMMM MM list */
     if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
     {
-       init_nblist(&fr->QMMMlist,NULL,
-                   maxsr,maxlr,0,icoul,FALSE,enlistATOM_ATOM);
+       init_nblist(log,&fr->QMMMlist,NULL,
+                   maxsr,maxlr,0,0,ielec,ielecmod,FALSE,GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE);
+   }
+
+   if(log!=NULL)
+   {
+       fprintf(log,"\n");
     }
  
     fr->ns.nblist_initialized=TRUE;
@@ -386,27 +321,28 @@ static void reset_nblist(t_nblist *nl)
       }
  }
  
-static void reset_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL)
+static void reset_neighbor_lists(t_forcerec *fr,gmx_bool bResetSR, gmx_bool bResetLR)
  {
      int n,i;
    
-    if (bLR) 
+    if (fr->bQMMM)
      {
-        reset_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
+        /* only reset the short-range nblist */
+        reset_nblist(&(fr->QMMMlist));
      }
-    else 
+
+    for(n=0; n<fr->nnblists; n++)
      {
-        for(n=0; n<fr->nnblists; n++)
+        for(i=0; i<eNL_NR; i++)
          {
-            for(i=0; i<eNL_NR; i++)
+            if(bResetSR)
              {
-                reset_nblist(&(fr->nblists[n].nlist_sr[i]));
+                reset_nblist( &(fr->nblists[n].nlist_sr[i]) );
+            }
+            if(bResetLR)
+            {
+                reset_nblist( &(fr->nblists[n].nlist_lr[i]) );
              }
-        }
-        if (fr->bQMMM)
-        { 
-            /* only reset the short-range nblist */
-            reset_nblist(&(fr->QMMMlist));
          }
      }
  }
@@ -494,36 +430,26 @@ static inline void close_nblist(t_nblist *nlist)
      }
  }
  
-static inline void close_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL, 
-                                       gmx_bool bMakeQMMMnblist)
+static inline void close_neighbor_lists(t_forcerec *fr,gmx_bool bMakeQMMMnblist)
  {
      int n,i;
      
-    if (bMakeQMMMnblist) {
-        if (!bLR)
-        {
+    if (bMakeQMMMnblist)
+    {
              close_nblist(&(fr->QMMMlist));
-        }
      }
-    else 
+
+    for(n=0; n<fr->nnblists; n++)
      {
-        if (bLR)
+        for(i=0; (i<eNL_NR); i++)
          {
-            close_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
-        }
-        else
-        { 
-            for(n=0; n<fr->nnblists; n++)
-            {
-                for(i=0; (i<eNL_NR); i++)
-                {
-                    close_nblist(&(fr->nblists[n].nlist_sr[i]));
-                }
-            }
+            close_nblist(&(fr->nblists[n].nlist_sr[i]));
+            close_nblist(&(fr->nblists[n].nlist_lr[i]));
          }
      }
  }
  
+
  static inline void add_j_to_nblist(t_nblist *nlist,atom_id j_atom,gmx_bool bLR)
  {
      int nrj=nlist->nrj;
@@ -532,8 +458,8 @@ static inline void add_j_to_nblist(t_nblist *nlist,atom_id j_atom,gmx_bool bLR)
      {
          nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
          if (gmx_debug_at)
-            fprintf(debug,"Increasing %s nblist %s j size to %d\n",
-                    bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
+            fprintf(debug,"Increasing %s nblist (ielec=%d,ivdw=%d,free=%d,igeometry=%d) j size to %d\n",
+                    bLR ? "LR" : "SR",nlist->ielec,nlist->ivdw,nlist->free_energy,nlist->igeometry,nlist->maxnrj);
          
          srenew(nlist->jjnr,nlist->maxnrj);
      }
@@ -554,8 +480,8 @@ static inline void add_j_to_nblist_cg(t_nblist *nlist,
      {
          nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
          if (gmx_debug_at)
-            fprintf(debug,"Increasing %s nblist %s j size to %d\n",
-                    bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
+            fprintf(debug,"Increasing %s nblist (ielec=%d,ivdw=%d,free=%d,igeometry=%d) j size to %d\n",
+                    bLR ? "LR" : "SR",nlist->ielec,nlist->ivdw,nlist->free_energy,nlist->igeometry,nlist->maxnrj);
          
          srenew(nlist->jjnr    ,nlist->maxnrj);
          srenew(nlist->jjnr_end,nlist->maxnrj);
@@ -599,9 +525,10 @@ put_in_list_t(gmx_bool              bHaveVdW[],
                t_excl            bExcl[],
                int               shift,
                t_forcerec *      fr,
-              gmx_bool              bLR,
-              gmx_bool              bDoVdW,
-              gmx_bool              bDoCoul);
+              gmx_bool          bLR,
+              gmx_bool          bDoVdW,
+              gmx_bool          bDoCoul,
+              int               solvent_opt);
  
  static void 
  put_in_list_at(gmx_bool              bHaveVdW[],
@@ -615,9 +542,10 @@ put_in_list_at(gmx_bool              bHaveVdW[],
                 t_excl            bExcl[],
                 int               shift,
                 t_forcerec *      fr,
-               gmx_bool              bLR,
-               gmx_bool              bDoVdW,
-               gmx_bool              bDoCoul)
+               gmx_bool          bLR,
+               gmx_bool          bDoVdW,
+               gmx_bool          bDoCoul,
+               int               solvent_opt)
  {
      /* The a[] index has been removed,
       * to put it back in i_atom should be a[i0] and jj should be a[jj].
@@ -658,7 +586,8 @@ put_in_list_at(gmx_bool              bHaveVdW[],
      
      /* Get the i charge group info */
      igid   = GET_CGINFO_GID(cginfo[icg]);
-    iwater = GET_CGINFO_SOLOPT(cginfo[icg]);
+
+    iwater = (solvent_opt!=esolNO) ? GET_CGINFO_SOLOPT(cginfo[icg]) : esolNO;
      
      bFreeEnergy = FALSE;
      if (md->nPerturbed) 
@@ -1152,9 +1081,10 @@ put_in_list_qmmm(gmx_bool              bHaveVdW[],
                   t_excl            bExcl[],
                   int               shift,
                   t_forcerec *      fr,
-                 gmx_bool              bLR,
-                 gmx_bool              bDoVdW,
-                 gmx_bool              bDoCoul)
+                 gmx_bool          bLR,
+                 gmx_bool          bDoVdW,
+                 gmx_bool          bDoCoul,
+                 int               solvent_opt)
  {
      t_nblist *   coul;
      int          i,j,jcg,igid,gid;
@@ -1214,9 +1144,10 @@ put_in_list_cg(gmx_bool              bHaveVdW[],
                 t_excl            bExcl[],
                 int               shift,
                 t_forcerec *      fr,
-               gmx_bool              bLR,
-               gmx_bool              bDoVdW,
-               gmx_bool              bDoCoul)
+               gmx_bool          bLR,
+               gmx_bool          bDoVdW,
+               gmx_bool          bDoCoul,
+               int               solvent_opt)
  {
      int          cginfo;
      int          igid,gid,nbl_ind;
@@ -1444,7 +1375,7 @@ static void add_simple(t_ns_buf *nsbuf,int nrj,atom_id cg_j,
      if (nsbuf->nj + nrj > MAX_CG)
      {
          put_in_list(bHaveVdW,ngid,md,icg,jgid,nsbuf->ncg,nsbuf->jcg,
-                    cgs->index,bexcl,shift,fr,FALSE,TRUE,TRUE);
+                    cgs->index,bexcl,shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
          /* Reset buffer contents */
          nsbuf->ncg = nsbuf->nj = 0;
      }
@@ -1518,7 +1449,7 @@ static void ns_inner_rect(rvec x[],int icg,int *i_egp_flags,
                  }
              }
          }
-    } 
+    }
      else
      {
          for(j=0; (j<njcg); j++)
@@ -1616,7 +1547,7 @@ static int ns_simple_core(t_forcerec *fr,
                  if (nsbuf->ncg > 0)
                  {
                      put_in_list(bHaveVdW,ngid,md,icg,nn,nsbuf->ncg,nsbuf->jcg,
-                                cgs->index,bexcl,k,fr,FALSE,TRUE,TRUE);
+                                cgs->index,bexcl,k,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
                      nsbuf->ncg=nsbuf->nj=0;
                  }
              }
@@ -1624,7 +1555,7 @@ static int ns_simple_core(t_forcerec *fr,
          /* setexcl(nri,i_atoms,excl,FALSE,bexcl); */
          setexcl(cgs->index[icg],cgs->index[icg+1],excl,FALSE,bexcl);
      }
-    close_neighbor_list(fr,FALSE,-1,-1,FALSE);
+    close_neighbor_lists(fr,FALSE);
      
      return nsearch;
  }
@@ -1785,56 +1716,13 @@ static inline void get_dx_dd(int Nx,real gridx,real rc2,int xgi,real x,
   *
   ****************************************************/
  
-static void do_longrange(t_commrec *cr,gmx_localtop_t *top,t_forcerec *fr,
-                         int ngid,t_mdatoms *md,int icg,
-                         int jgid,int nlr,
-                         atom_id lr[],t_excl bexcl[],int shift,
-                         rvec x[],rvec box_size,t_nrnb *nrnb,
-                         real *lambda,real *dvdlambda,
-                         gmx_grppairener_t *grppener,
-                         gmx_bool bDoVdW,gmx_bool bDoCoul,
-                         gmx_bool bEvaluateNow,put_in_list_t *put_in_list,
-                         gmx_bool bHaveVdW[],
-                         gmx_bool bDoForces,rvec *f)
-{
-    int n,i;
-    t_nblist *nl;
-    
-    for(n=0; n<fr->nnblists; n++)
-    {
-        for(i=0; (i<eNL_NR); i++)
-        {
-            nl = &fr->nblists[n].nlist_lr[i];
-            if ((nl->nri > nl->maxnri-32) || bEvaluateNow)
-            {
-                close_neighbor_list(fr,TRUE,n,i,FALSE);
-                /* Evaluate the energies and forces */
-                do_nonbonded(cr,fr,x,f,md,NULL,
-                             grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR],
-                             grppener->ener[egCOULLR],
-                                                        grppener->ener[egGB],box_size,
-                             nrnb,lambda,dvdlambda,n,i,
-                             GMX_DONB_LR | GMX_DONB_FORCES);
-                
-                reset_neighbor_list(fr,TRUE,n,i);
-            }
-        }
-    }
-    
-    if (!bEvaluateNow)
-    {  
-        /* Put the long range particles in a list */
-        /* do_longrange is never called for QMMM  */
-        put_in_list(bHaveVdW,ngid,md,icg,jgid,nlr,lr,top->cgs.index,
-                    bexcl,shift,fr,TRUE,bDoVdW,bDoCoul);
-    }
-}
  
  static void get_cutoff2(t_forcerec *fr,gmx_bool bDoLongRange,
                          real *rvdw2,real *rcoul2,
                          real *rs2,real *rm2,real *rl2)
  {
      *rs2 = sqr(fr->rlist);
+
      if (bDoLongRange && fr->bTwinRange)
      {
          /* The VdW and elec. LR cut-off's could be different,
@@ -1883,26 +1771,16 @@ static void init_nsgrid_lists(t_forcerec *fr,int ngid,gmx_ns_t *ns)
      snew(ns->nlr_ljc,ngid);
      snew(ns->nlr_one,ngid);
      
-    if (rm2 > rs2)
-    {
-            /* Long range VdW and Coul buffers */
-        snew(ns->nl_lr_ljc,ngid);
-    }
-    if (rl2 > rm2)
-    {
-        /* Long range VdW or Coul only buffers */
-        snew(ns->nl_lr_one,ngid);
-    }
+    /* Always allocate both list types, since rcoulomb might now change with PME load balancing */
+    /* Long range VdW and Coul buffers */
+    snew(ns->nl_lr_ljc,ngid);
+    /* Long range VdW or Coul only buffers */
+    snew(ns->nl_lr_one,ngid);
+
      for(j=0; (j<ngid); j++) {
          snew(ns->nl_sr[j],MAX_CG);
-        if (rm2 > rs2)
-        {
-            snew(ns->nl_lr_ljc[j],MAX_CG);
-        }
-        if (rl2 > rm2)
-        {
-            snew(ns->nl_lr_one[j],MAX_CG);
-        }
+        snew(ns->nl_lr_ljc[j],MAX_CG);
+        snew(ns->nl_lr_one[j],MAX_CG);
      }
      if (debug)
      {
@@ -1922,8 +1800,7 @@ static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
                         gmx_grppairener_t *grppener,
                         put_in_list_t *put_in_list,
                         gmx_bool bHaveVdW[],
-                       gmx_bool bDoLongRange,gmx_bool bDoForces,rvec *f,
-                       gmx_bool bMakeQMMMnblist)
+                       gmx_bool bDoLongRange,gmx_bool bMakeQMMMnblist)
  {
      gmx_ns_t *ns;
      atom_id **nl_lr_ljc,**nl_lr_one,**nl_sr;
@@ -2273,10 +2150,11 @@ static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
                                                          {
                                                              if (nsr[jgid] >= MAX_CG)
                                                              {
+                                                                /* Add to short-range list */
                                                                  put_in_list(bHaveVdW,ngid,md,icg,jgid,
                                                                              nsr[jgid],nl_sr[jgid],
                                                                              cgs->index,/* cgsatoms, */ bexcl,
-                                                                            shift,fr,FALSE,TRUE,TRUE);
+                                                                            shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
                                                                  nsr[jgid]=0;
                                                              }
                                                              nl_sr[jgid][nsr[jgid]++]=jjcg;
@@ -2285,33 +2163,22 @@ static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
                                                          {
                                                              if (nlr_ljc[jgid] >= MAX_CG)
                                                              {
-                                                                do_longrange(cr,top,fr,ngid,md,icg,jgid,
-                                                                             nlr_ljc[jgid],
-                                                                             nl_lr_ljc[jgid],bexcl,shift,x,
-                                                                             box_size,nrnb,
-                                                                             lambda,dvdlambda,
-                                                                             grppener,
-                                                                             TRUE,TRUE,FALSE,
-                                                                             put_in_list,
-                                                                             bHaveVdW,
-                                                                             bDoForces,f);
+                                                                /* Add to LJ+coulomb long-range list */
+                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
+                                                                            nlr_ljc[jgid],nl_lr_ljc[jgid],top->cgs.index,
+                                                                            bexcl,shift,fr,TRUE,TRUE,TRUE,fr->solvent_opt);
                                                                  nlr_ljc[jgid]=0;
                                                              }
                                                              nl_lr_ljc[jgid][nlr_ljc[jgid]++]=jjcg;
                                                          }
                                                          else
                                                          {
-                                                            if (nlr_one[jgid] >= MAX_CG) {
-                                                                do_longrange(cr,top,fr,ngid,md,icg,jgid,
-                                                                             nlr_one[jgid],
-                                                                             nl_lr_one[jgid],bexcl,shift,x,
-                                                                             box_size,nrnb,
-                                                                             lambda,dvdlambda,
-                                                                             grppener,
-                                                                             rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
-                                                                             put_in_list,
-                                                                             bHaveVdW,
-                                                                             bDoForces,f);
+                                                            if (nlr_one[jgid] >= MAX_CG)
+                                                            {
+                                                                /* Add to long-range list with only coul, or only LJ */
+                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
+                                                                            nlr_one[jgid],nl_lr_one[jgid],top->cgs.index,
+                                                                            bexcl,shift,fr,TRUE,rvdw_lt_rcoul,rcoul_lt_rvdw,fr->solvent_opt);
                                                                  nlr_one[jgid]=0;
                                                              }
                                                              nl_lr_one[jgid][nlr_one[jgid]++]=jjcg;
@@ -2333,24 +2200,21 @@ static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
                          {
                              put_in_list(bHaveVdW,ngid,md,icg,nn,nsr[nn],nl_sr[nn],
                                          cgs->index, /* cgsatoms, */ bexcl,
-                                        shift,fr,FALSE,TRUE,TRUE);
+                                        shift,fr,FALSE,TRUE,TRUE,fr->solvent_opt);
                          }
                          
                          if (nlr_ljc[nn] > 0)
                          {
-                            do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_ljc[nn],
-                                         nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
-                                         lambda,dvdlambda,grppener,TRUE,TRUE,FALSE,
-                                         put_in_list,bHaveVdW,bDoForces,f);
+                            put_in_list(bHaveVdW,ngid,md,icg,nn,nlr_ljc[nn],
+                                        nl_lr_ljc[nn],top->cgs.index,
+                                        bexcl,shift,fr,TRUE,TRUE,TRUE,fr->solvent_opt);
                          }
                          
                          if (nlr_one[nn] > 0)
                          {
-                            do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_one[nn],
-                                         nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
-                                         lambda,dvdlambda,grppener,
-                                         rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
-                                         put_in_list,bHaveVdW,bDoForces,f);
+                            put_in_list(bHaveVdW,ngid,md,icg,nn,nlr_one[nn],
+                                        nl_lr_one[nn],top->cgs.index,
+                                        bexcl,shift,fr,TRUE,rvdw_lt_rcoul,rcoul_lt_rvdw,fr->solvent_opt);
                          }
                      }
                  }
@@ -2359,28 +2223,14 @@ static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
          /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
          setexcl(cgs->index[icg],cgs->index[icg+1],&top->excls,FALSE,bexcl);
      }
-    /* Perform any left over force calculations */
-    for (nn=0; (nn<ngid); nn++)
-    {
-        if (rm2 > rs2)
-        {
-            do_longrange(cr,top,fr,0,md,icg,nn,nlr_ljc[nn],
-                         nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
-                         lambda,dvdlambda,grppener,
-                         TRUE,TRUE,TRUE,put_in_list,bHaveVdW,bDoForces,f);
-        }
-        if (rl2 > rm2) {
-            do_longrange(cr,top,fr,0,md,icg,nn,nlr_one[nn],
-                         nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
-                         lambda,dvdlambda,grppener,
-                         rvdw_lt_rcoul,rcoul_lt_rvdw,
-                         TRUE,put_in_list,bHaveVdW,bDoForces,f);
-        }
-    }
+    /* No need to perform any left-over force calculations anymore (as we used to do here)
+     * since we now save the proper long-range lists for later evaluation.
+     */
+
      debug_gmx();
-    
-    /* Close off short range neighbourlists */
-    close_neighbor_list(fr,FALSE,-1,-1,bMakeQMMMnblist);
+     
+    /* Close neighbourlists */
+    close_neighbor_lists(fr,bMakeQMMMnblist);
      
      return nns;
  }
@@ -2522,8 +2372,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
                        real *lambda,real *dvdlambda,
                        gmx_grppairener_t *grppener,
                        gmx_bool bFillGrid,
-                      gmx_bool bDoLongRange,
-                      gmx_bool bDoForces,rvec *f)
+                      gmx_bool bDoLongRangeNS)
  {
      t_block  *cgs=&(top->cgs);
      rvec     box_size,grid_x0,grid_x1;
@@ -2538,7 +2387,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
      t_grid   *grid;
      gmx_domdec_zones_t *dd_zones;
      put_in_list_t *put_in_list;
-       
+
      ns = &fr->ns;
  
      /* Set some local variables */
@@ -2571,7 +2420,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
      debug_gmx();
      
      /* Reset the neighbourlists */
-    reset_neighbor_list(fr,FALSE,-1,-1);
+    reset_neighbor_lists(fr,TRUE,TRUE);
      
      if (bGrid && bFillGrid)
      {
@@ -2657,8 +2506,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
                                grid,x,ns->bexcl,ns->bExcludeAlleg,
                                nrnb,md,lambda,dvdlambda,grppener,
                                put_in_list,ns->bHaveVdW,
-                              bDoLongRange,bDoForces,f,
-                              FALSE);
+                              bDoLongRangeNS,FALSE);
          
          /* neighbour searching withouth QMMM! QM atoms have zero charge in
           * the classical calculation. The charge-charge interaction
@@ -2675,8 +2523,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
                                     grid,x,ns->bexcl,ns->bExcludeAlleg,
                                     nrnb,md,lambda,dvdlambda,grppener,
                                     put_in_list_qmmm,ns->bHaveVdW,
-                                   bDoLongRange,bDoForces,f,
-                                   TRUE);
+                                   bDoLongRangeNS,TRUE);
          }
      }
      else 
@@ -2686,7 +2533,7 @@ int search_neighbours(FILE *log,t_forcerec *fr,
                                   ngid,ns->ns_buf,put_in_list,ns->bHaveVdW);
      }
      debug_gmx();
-    
+
  #ifdef DEBUG
      pr_nsblock(log);
  #endif
diff --git a/src/mdlib/qmmm.c b/src/mdlib/qmmm.c

index 0a0c9030f2481f1e6e13033b7c9db0acb28cd045..9232f50e0334f3f5c60bd7b43f3a12bdea8bd414 100644 (file)
--- a/src/mdlib/qmmm.c
+++ b/src/mdlib/qmmm.c
@@ -568,13 +568,14 @@ void init_QMMMrec(t_commrec *cr,
        /* we now store the LJ C6 and C12 parameters in QM rec in case
         * we need to do an optimization 
         */
-      if(qr->qm[j]->bOPT || qr->qm[j]->bTS){
-       for(i=0;i<qm_nr;i++){
-         qr->qm[j]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,
-                                 atom->type,atom->type)/c6au;
-         qr->qm[j]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,
-                                 atom->type,atom->type)/c12au;
-       }
+      if(qr->qm[j]->bOPT || qr->qm[j]->bTS)
+      {
+          for(i=0;i<qm_nr;i++)
+          {
+              /* nbfp now includes the 6.0/12.0 derivative prefactors */
+              qr->qm[j]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c6au/6.0;
+              qr->qm[j]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c12au/12.0;
+          }
        }
        /* now we check for frontier QM atoms. These occur in pairs that
         * construct the vsite
@@ -629,18 +630,16 @@ void init_QMMMrec(t_commrec *cr,
      /* store QM atoms in the QMrec and initialise
       */
      init_QMrec(0,qr->qm[0],qm_nr,qm_arr,mtop,ir);
-    if(qr->qm[0]->bOPT || qr->qm[0]->bTS){
-      for(i=0;i<qm_nr;i++){
-       gmx_mtop_atomnr_to_atom(alook,qm_arr[i],&atom);
-       qr->qm[0]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,
-                               atom->type,atom->type)/c6au;
-       qr->qm[0]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,
-                               atom->type,atom->type)/c12au;
-      }
-      
+    if(qr->qm[0]->bOPT || qr->qm[0]->bTS)
+    {
+        for(i=0;i<qm_nr;i++)
+        {
+            gmx_mtop_atomnr_to_atom(alook,qm_arr[i],&atom);
+            /* nbfp now includes the 6.0/12.0 derivative prefactors */
+            qr->qm[0]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c6au/6.0;
+            qr->qm[0]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,atom->type,atom->type)/c12au/12.0;
+        }
      }
-    
-
  
      /* find frontier atoms and mark them true in the frontieratoms array.
       */
@@ -957,13 +956,11 @@ void update_QMMMrec(t_commrec *cr,
         */
        srenew(mm->c6,mm->nrMMatoms);
        srenew(mm->c12,mm->nrMMatoms);
-      for (i=0;i<mm->nrMMatoms;i++){
-       mm->c6[i]  = C6(fr->nbfp,top->idef.atnr,
-                       md->typeA[mm->indexMM[i]],
-                       md->typeA[mm->indexMM[i]])/c6au;
-       mm->c12[i] =C12(fr->nbfp,top->idef.atnr,
-                       md->typeA[mm->indexMM[i]],
-                       md->typeA[mm->indexMM[i]])/c12au;
+      for (i=0;i<mm->nrMMatoms;i++)
+      {
+          /* nbfp now includes the 6.0/12.0 derivative prefactors */
+          mm->c6[i]  = C6(fr->nbfp,top->idef.atnr,md->typeA[mm->indexMM[i]],md->typeA[mm->indexMM[i]])/c6au/6.0;
+          mm->c12[i] =C12(fr->nbfp,top->idef.atnr,md->typeA[mm->indexMM[i]],md->typeA[mm->indexMM[i]])/c12au/12.0;
        }
        punch_QMMM_excl(qr->qm[0],mm,&(top->excls));
      }
diff --git a/src/mdlib/sim_util.c b/src/mdlib/sim_util.c

index 889d1d48580b536b8124d8fee91036dfa29679aa..e1c0e5ccb25af9e36e8532c7211b486afe6acbca 100644 (file)
--- a/src/mdlib/sim_util.c
+++ b/src/mdlib/sim_util.c
@@ -760,7 +760,7 @@ void do_force_cutsVERLET(FILE *fplog,t_commrec *cr,
      bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE); 
      bFillGrid     = (bNS && bStateChanged);
      bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
-    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
+    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DO_LR));
      bDoForces     = (flags & GMX_FORCE_FORCES);
      bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
      bUseGPU       = fr->nbv->bUseGPU;
@@ -1110,20 +1110,13 @@ void do_force_cutsVERLET(FILE *fplog,t_commrec *cr,
              }
          }
  
-        if (bSepLRF)
+        /* Clear the short- and long-range forces */
+        clear_rvecs(fr->natoms_force_constr,f);
+        if(bSepLRF && do_per_step(step,inputrec->nstcalclr))
          {
-            /* Add the long range forces to the short range forces */
-            for(i=0; i<fr->natoms_force_constr; i++)
-            {
-                copy_rvec(fr->f_twin[i],f[i]);
-            }
+            clear_rvecs(fr->natoms_force_constr,fr->f_twin);
          }
-        else if (!(fr->bTwinRange && bNS))
-        {
-            /* Clear the short-range forces */
-            clear_rvecs(fr->natoms_force_constr,f);
-        }
-
+        
          clear_rvec(fr->vir_diag_posres);
  
          GMX_BARRIER(cr->mpi_comm_mygroup);
@@ -1149,13 +1142,25 @@ void do_force_cutsVERLET(FILE *fplog,t_commrec *cr,
      /* if we use the GPU turn off the nonbonded */
      do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
                        cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
-                      x,hist,f,enerd,fcd,mtop,top,fr->born,
+                      x,hist,f, bSepLRF ? fr->f_twin : f,enerd,fcd,mtop,top,fr->born,
                        &(top->atomtypes),bBornRadii,box,
                        inputrec->fepvals,lambda,graph,&(top->excls),fr->mu_tot,
                        ((nb_kernel_type == nbk8x8x8_CUDA || nb_kernel_type == nbk8x8x8_PlainC) 
                          ? flags&~GMX_FORCE_NONBONDED : flags),
                        &cycles_pme);
  
+    if(bSepLRF)
+    {
+        if (do_per_step(step,inputrec->nstcalclr))
+        {
+            /* Add the long range forces to the short range forces */
+            for(i=0; i<fr->natoms_force_constr; i++)
+            {
+                rvec_add(fr->f_twin[i],f[i],f[i]);
+            }
+        }
+    }
+    
      if (!bUseOrEmulGPU)
      {
          /* Maybe we should move this into do_force_lowlevel */
@@ -1410,7 +1415,7 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
      int    start,homenr;
      double mu[2*DIM];
      gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
-    gmx_bool   bDoLongRange,bDoForces,bSepLRF;
+    gmx_bool   bDoLongRangeNS,bDoForces,bDoPotential,bSepLRF;
      gmx_bool   bDoAdressWF;
      matrix boxs;
      rvec   vzero,box_diag;
@@ -1446,13 +1451,18 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
          }
      }
  
-    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
-    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE);
-    bFillGrid     = (bNS && bStateChanged);
-    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
-    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
-    bDoForces     = (flags & GMX_FORCE_FORCES);
-    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
+    bStateChanged  = (flags & GMX_FORCE_STATECHANGED);
+    bNS            = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE);
+    /* Should we update the long-range neighborlists at this step? */
+    bDoLongRangeNS = fr->bTwinRange && bNS;
+    /* Should we perform the long-range nonbonded evaluation inside the neighborsearching? */
+    bFillGrid      = (bNS && bStateChanged);
+    bCalcCGCM      = (bFillGrid && !DOMAINDECOMP(cr));
+    bDoForces      = (flags & GMX_FORCE_FORCES);
+    bDoPotential   = (flags & GMX_FORCE_ENERGY);
+    bSepLRF        = ((inputrec->nstcalclr>1) && bDoForces &&
+                      (flags & GMX_FORCE_SEPLRF) && (flags & GMX_FORCE_DO_LR));
+
      /* should probably move this to the forcerec since it doesn't change */
      bDoAdressWF   = ((fr->adress_type!=eAdressOff));
  
@@ -1613,13 +1623,6 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
              mk_mshift(fplog,graph,fr->ePBC,box,x);
          }
  
-        /* Reset long range forces if necessary */
-        if (fr->bTwinRange)
-        {
-            /* Reset the (long-range) forces if necessary */
-            clear_rvecs(fr->natoms_force_constr,bSepLRF ? fr->f_twin : f);
-        }
-
          /* Do the actual neighbour searching and if twin range electrostatics
           * also do the calculation of long range forces and energies.
           */
@@ -1627,7 +1630,7 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
          ns(fplog,fr,x,box,
             groups,&(inputrec->opts),top,mdatoms,
             cr,nrnb,lambda,dvdlambda,&enerd->grpp,bFillGrid,
-           bDoLongRange,bDoForces,bSepLRF ? fr->f_twin : f);
+           bDoLongRangeNS);
          if (bSepDVDL)
          {
              fprintf(fplog,sepdvdlformat,"LR non-bonded",0.0,dvdlambda);
@@ -1700,20 +1703,13 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
              }
          }
  
-        if (bSepLRF)
-        {
-            /* Add the long range forces to the short range forces */
-            for(i=0; i<fr->natoms_force_constr; i++)
-            {
-                copy_rvec(fr->f_twin[i],f[i]);
-            }
-        }
-        else if (!(fr->bTwinRange && bNS))
+        /* Clear the short- and long-range forces */
+        clear_rvecs(fr->natoms_force_constr,f);
+        if(bSepLRF && do_per_step(step,inputrec->nstcalclr))
          {
-            /* Clear the short-range forces */
-            clear_rvecs(fr->natoms_force_constr,f);
+            clear_rvecs(fr->natoms_force_constr,fr->f_twin);
          }
-
+        
          clear_rvec(fr->vir_diag_posres);
  
          GMX_BARRIER(cr->mpi_comm_mygroup);
@@ -1738,13 +1734,25 @@ void do_force_cutsGROUP(FILE *fplog,t_commrec *cr,
      /* Compute the bonded and non-bonded energies and optionally forces */
      do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
                        cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
-                      x,hist,f,enerd,fcd,mtop,top,fr->born,
+                      x,hist,f, bSepLRF ? fr->f_twin : f,enerd,fcd,mtop,top,fr->born,
                        &(top->atomtypes),bBornRadii,box,
                        inputrec->fepvals,lambda,
                        graph,&(top->excls),fr->mu_tot,
                        flags,
                        &cycles_pme);
  
+    if(bSepLRF)
+    {
+        if (do_per_step(step,inputrec->nstcalclr))
+        {
+            /* Add the long range forces to the short range forces */
+            for(i=0; i<fr->natoms_force_constr; i++)
+            {
+                rvec_add(fr->f_twin[i],f[i],f[i]);
+            }
+        }
+    }
+    
      cycles_force = wallcycle_stop(wcycle,ewcFORCE);
      GMX_BARRIER(cr->mpi_comm_mygroup);
  
@@ -2039,7 +2047,7 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
    double r0,r1,r,rc3,rc9,ea,eb,ec,pa,pb,pc,pd;
    double invscale,invscale2,invscale3;
    int    ri0,ri1,ri,i,offstart,offset;
-  real   scale,*vdwtab;
+  real   scale,*vdwtab,tabfactor,tmp;
  
    fr->enershiftsix = 0;
    fr->enershifttwelve = 0;
@@ -2059,8 +2067,8 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
                   "With dispersion correction rvdw-switch can not be zero "
                   "for vdw-type = %s",evdw_names[fr->vdwtype]);
  
-      scale  = fr->nblists[0].tab.scale;
-      vdwtab = fr->nblists[0].vdwtab;
+      scale  = fr->nblists[0].table_elec_vdw.scale;
+      vdwtab = fr->nblists[0].table_vdw.data;
  
        /* Round the cut-offs to exact table values for precision */
        ri0 = floor(fr->rvdw_switch*scale);
@@ -2070,10 +2078,14 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
        rc3 = r0*r0*r0;
        rc9  = rc3*rc3*rc3;
  
-      if (fr->vdwtype == evdwSHIFT) {
-       /* Determine the constant energy shift below rvdw_switch */
-       fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - vdwtab[8*ri0];
-       fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - vdwtab[8*ri0 + 4];
+      if (fr->vdwtype == evdwSHIFT)
+      {
+          /* Determine the constant energy shift below rvdw_switch.
+           * Table has a scale factor since we have scaled it down to compensate
+           * for scaling-up c6/c12 with the derivative factors to save flops in analytical kernels.
+           */
+          fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - 6.0*vdwtab[8*ri0];
+          fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - 12.0*vdwtab[8*ri0 + 4];
        }
        /* Add the constant part from 0 to rvdw_switch.
         * This integration from 0 to rvdw_switch overcounts the number
@@ -2100,9 +2112,19 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
        for (i=0;i<2;i++) {
          enersum = 0.0; virsum = 0.0;
          if (i==0)
-         offstart = 0;
-       else
-         offstart = 4;
+        {
+            offstart = 0;
+            /* Since the dispersion table has been scaled down a factor 6.0 and the repulsion
+             * a factor 12.0 to compensate for the c6/c12 parameters inside nbfp[] being scaled
+             * up (to save flops in kernels), we need to correct for this.
+             */
+            tabfactor = 6.0;
+        }
+        else
+        {
+            offstart = 4;
+            tabfactor = 12.0;
+        }
         for (ri=ri0; ri<ri1; ri++) {
            r = ri*invscale;
            ea = invscale3;
@@ -2114,22 +2136,19 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
            pc = 3.0*invscale*r*r;
            pd = r*r*r;
  
-          /* this "8" is from the packing in the vdwtab array - perhaps
-           should be #define'ed? */
+          /* this "8" is from the packing in the vdwtab array - perhaps should be #define'ed? */
            offset = 8*ri + offstart;
            y0 = vdwtab[offset];
-          f = vdwtab[offset+1];
-          g = vdwtab[offset+2];
-          h = vdwtab[offset+3];
-
-          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2)+
-            g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
-          virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) +
-            2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
+          f  = vdwtab[offset+1];
+          g  = vdwtab[offset+2];
+          h  = vdwtab[offset+3];
  
+          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2) + g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
+          virsum  += f*(pa/4 + pb/3 + pc/2 + pd) + 2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
          }
-        enersum *= 4.0*M_PI;
-        virsum  *= 4.0*M_PI;
+          
+        enersum *= 4.0*M_PI*tabfactor;
+        virsum  *= 4.0*M_PI*tabfactor;
          eners[i] -= enersum;
          virs[i]  -= virsum;
        }
@@ -2149,7 +2168,7 @@ void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
        /* Contribution beyond the cut-off */
        eners[0] += -4.0*M_PI/(3.0*rc3);
        eners[1] +=  4.0*M_PI/(9.0*rc9);
-      if (fr->vdw_pot_shift) {
+      if (fr->vdw_modifier==eintmodPOTSHIFT) {
            /* Contribution within the cut-off */
            eners[0] += -4.0*M_PI/(3.0*rc3);
            eners[1] +=  4.0*M_PI/(3.0*rc9);
diff --git a/src/mdlib/tables.c b/src/mdlib/tables.c

index 0c26ea947a810e4fe2d1e475b7b1970c8fd45175..8b2228919c26dec9ac3cc314e661f0112e23d89f 100644 (file)
--- a/src/mdlib/tables.c
+++ b/src/mdlib/tables.c
@@ -139,12 +139,14 @@ static double v_ewald_lr(double beta,double r)
      }
  }
  
-void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
-                                 int ntab,int tableformat,
-                                 real dx,real beta)
+void table_spline3_fill_ewald_lr(real *table_f,
+                                 real *table_v,
+                                 real *table_fdv0,
+                                 int   ntab,
+                                 real  dx,
+                                 real  beta)
  {
      real tab_max;
-    int stride=0;
      int i,i_inrange;
      double dc,dc_new;
      gmx_bool bOutOfRange;
@@ -168,13 +170,6 @@ void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
       * The rms force error is the max error times 1/sqrt(5)=0.45.
       */
  
-    switch (tableformat)
-    {
-    case tableformatF:    stride = 1; break;
-    case tableformatFDV0: stride = 4; break;
-    default: gmx_incons("Unknown table format");
-    }
-
      bOutOfRange = FALSE;
      i_inrange = ntab;
      v_inrange = 0;
@@ -198,20 +193,9 @@ void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
              vi = v_inrange - dc*(i - i_inrange)*dx;
          }
  
-        switch (tableformat)
+        if(table_v!=NULL)
          {
-        case tableformatF:
-            if (tabv != NULL)
-            {
-                tabv[i] = vi;
-            }
-            break;
-        case tableformatFDV0:
-            tabf[i*stride+2] = vi;
-            tabf[i*stride+3] = 0;
-            break;
-        default:
-            gmx_incons("Unknown table format");
+            table_v[i] = vi;
          }
  
          if (i == 0)
@@ -243,12 +227,12 @@ void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
          if (i == ntab - 1)
          {
              /* Fill the table with the force, minus the derivative of the spline */
-            tabf[i*stride] = -dc;
+            table_f[i] = -dc;
          }
          else
          {
              /* tab[i] will contain the average of the splines over the two intervals */
-            tabf[i*stride] += -0.5*dc;
+            table_f[i] += -0.5*dc;
          }
  
          if (!bOutOfRange)
@@ -274,19 +258,27 @@ void table_spline3_fill_ewald_lr(real *tabf,real *tabv,
              }
          }
  
-        tabf[(i-1)*stride] = -0.5*dc;
+        table_f[(i-1)] = -0.5*dc;
      }
      /* Currently the last value only contains half the force: double it */
-    tabf[0] *= 2;
+    table_f[0] *= 2;
  
-    if (tableformat == tableformatFDV0)
+    if(table_v!=NULL && table_fdv0!=NULL)
      {
-        /* Store the force difference in the second entry */
-        for(i=0; i<ntab-1; i++)
+        /* Copy to FDV0 table too. Allocation occurs in forcerec.c,
+         * init_ewald_f_table().
+         */
+        for(i=0;i<ntab-1;i++)
          {
-            tabf[i*stride+1] = tabf[(i+1)*stride] - tabf[i*stride];
+            table_fdv0[4*i]     = table_f[i];
+            table_fdv0[4*i+1]   = table_f[i+1]-table_f[i];
+            table_fdv0[4*i+2]   = table_v[i];
+            table_fdv0[4*i+3]   = 0.0;
          }
-        tabf[(ntab-1)*stride+1] = -tabf[i*stride];
+        table_fdv0[4*(ntab-1)]    = table_f[(ntab-1)];
+        table_fdv0[4*(ntab-1)+1]  = -table_f[(ntab-1)];
+        table_fdv0[4*(ntab-1)+2]  = table_v[(ntab-1)];
+        table_fdv0[4*(ntab-1)+3]  = 0.0;
      }
  }
  
@@ -344,7 +336,7 @@ static void evaluate_table(real VFtab[], int offset, int stride,
  }
  
  static void copy2table(int n,int offset,int stride,
-                      double x[],double Vtab[],double Ftab[],
+                      double x[],double Vtab[],double Ftab[],real scalefactor,
                        real dest[])
  {
  /* Use double prec. for the intermediary variables
@@ -370,10 +362,10 @@ static void copy2table(int n,int offset,int stride,
        H   = 0;
      }
      nn0 = offset + i*stride;
-    dest[nn0]   = Vtab[i];
-    dest[nn0+1] = F;
-    dest[nn0+2] = G;
-    dest[nn0+3] = H;
+    dest[nn0]   = scalefactor*Vtab[i];
+    dest[nn0+1] = scalefactor*F;
+    dest[nn0+2] = scalefactor*G;
+    dest[nn0+3] = scalefactor*H;
    }
  }
  
@@ -761,30 +753,31 @@ static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
  
      switch (tp) {
      case etabLJ6:
-      /* Dispersion */
-      Vtab  = -r6;
-      Ftab  = 6.0*Vtab/r;
-      break;
+            /* Dispersion */
+            Vtab = -r6;
+            Ftab = 6.0*Vtab/r;
+            break;
      case etabLJ6Switch:
      case etabLJ6Shift:
        /* Dispersion */
        if (r < rc) {      
-       Vtab  = -r6;
-       Ftab  = 6.0*Vtab/r;
+          Vtab = -r6;
+          Ftab = 6.0*Vtab/r;
+          break;
        }
        break;
      case etabLJ12:
-      /* Repulsion */
-      Vtab  = r12;
-      Ftab  = reppow*Vtab/r;
+            /* Repulsion */
+            Vtab  = r12;
+            Ftab  = reppow*Vtab/r;
        break;
      case etabLJ12Switch:
      case etabLJ12Shift:
        /* Repulsion */
        if (r < rc) {                
-       Vtab  = r12;
-       Ftab  = reppow*Vtab/r;
-      }  
+          Vtab  = r12;
+          Ftab  = reppow*Vtab/r;
+      }
        break;
         case etabLJ6Encad:
          if(r < rc) {
@@ -797,9 +790,9 @@ static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
          break;
      case etabLJ12Encad:
          if(r < rc) {
-            Vtab  = r12-12.0*(rc-r)*rc6*rc6/rc-1.0*rc6*rc6;
-            Ftab  = 12.0*r12/r-12.0*rc6*rc6/rc;
-        } else { /* r>rc */ 
+            Vtab  = -(r6-6.0*(rc-r)*rc6/rc-rc6);
+            Ftab  = -(6.0*r6/r-6.0*rc6/rc);
+        } else { /* r>rc */
              Vtab  = 0;
              Ftab  = 0;
          } 
@@ -875,8 +868,8 @@ static void fill_table(t_tabledata *td,int tp,const t_forcerec *fr)
      if ((r > r1) && bSwitch) {
        Ftab = Ftab*swi - Vtab*swi1;
        Vtab = Vtab*swi;
-    }  
-    
+    }
+
      /* Convert to single precision when we store to mem */
      td->v[i]  = Vtab;
      td->f[i]  = Ftab;
@@ -1019,7 +1012,8 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
    gmx_bool        b14only,bReadTab,bGenTab;
    real        x0,y0,yp;
    int         i,j,k,nx,nx0,tabsel[etiNR];
-  
+  real        scalefactor;
+
    t_forcetable table;
  
    b14only = (flags & GMX_MAKETABLES_14ONLY);
@@ -1039,6 +1033,12 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
    nx0             = 10;
    nx              = 0;
    
+  table.interaction   = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
+  table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
+  table.formatsize    = 4;
+  table.ninteractions = 3;
+  table.stride        = table.formatsize*table.ninteractions;
+
    /* Check whether we have to read or generate */
    bReadTab = FALSE;
    bGenTab  = FALSE;
@@ -1084,7 +1084,7 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
     * numbers per nx+1 data points. For performance reasons we want
     * the table data to be aligned to 16-byte.
     */
-  snew_aligned(table.tab, 12*(nx+1)*sizeof(real),16);
+  snew_aligned(table.data, 12*(nx+1)*sizeof(real),16);
  
    for(k=0; (k<etiNR); k++) {
      if (tabsel[k] != etabUSER) {
@@ -1099,7 +1099,27 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
                 td[k].nx,b14only?"1-4 ":"",tprops[tabsel[k]].name,
                 td[k].tabscale);
      }
-    copy2table(table.n,k*4,12,td[k].x,td[k].v,td[k].f,table.tab);
+
+    /* Set scalefactor for c6/c12 tables. This is because we save flops in the non-table kernels
+     * by including the derivative constants (6.0 or 12.0) in the parameters, since
+     * we no longer calculate force in most steps. This means the c6/c12 parameters
+     * have been scaled up, so we need to scale down the table interactions too.
+     * It comes here since we need to scale user tables too.
+     */
+      if(k==etiLJ6)
+      {
+          scalefactor = 1.0/6.0;
+      }
+      else if(k==etiLJ12 && tabsel[k]!=etabEXPMIN)
+      {
+          scalefactor = 1.0/12.0;
+      }
+      else
+      {
+          scalefactor = 1.0;
+      }
+
+    copy2table(table.n,k*4,12,td[k].x,td[k].v,td[k].f,scalefactor,table.data);
      
      if (bDebugMode() && bVerbose) {
        if (b14only)
@@ -1109,7 +1129,7 @@ t_forcetable make_tables(FILE *out,const output_env_t oenv,
        /* plot the output 5 times denser than the table data */
        for(i=5*((nx0+1)/2); i<5*table.n; i++) {
         x0 = i*table.r/(5*(table.n-1));
-       evaluate_table(table.tab,4*k,12,table.scale,x0,&y0,&yp);
+       evaluate_table(table.data,4*k,12,table.scale,x0,&y0,&yp);
         fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
        }
        gmx_fio_fclose(fp);
@@ -1154,12 +1174,17 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
          * use etiNR (since we only have one table, but ...) 
          */
         snew(td,1);
-       table.r         = fr->gbtabr;
-       table.scale     = fr->gbtabscale;
-       table.scale_exp = 0;
-       table.n         = table.scale*table.r;
-       nx0             = 0;
-       nx              = table.scale*table.r;
+    table.interaction   = GMX_TABLE_INTERACTION_ELEC;
+    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
+       table.r             = fr->gbtabr;
+       table.scale         = fr->gbtabscale;
+       table.scale_exp     = 0;
+       table.n             = table.scale*table.r;
+    table.formatsize    = 4;
+    table.ninteractions = 1;
+    table.stride        = table.formatsize*table.ninteractions;
+       nx0                 = 0;
+       nx                  = table.scale*table.r;
         
         /* Check whether we have to read or generate 
          * We will always generate a table, so remove the read code
@@ -1177,7 +1202,7 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
          * to do this :-)
          */
         
-       snew_aligned(table.tab,4*nx,16);
+       snew_aligned(table.data,4*nx,16);
         
         init_table(out,nx,nx0,table.scale,&(td[0]),!bReadTab);
         
@@ -1206,7 +1231,7 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
                 
      }
         
-       copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,table.tab);
+       copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,1.0,table.data);
         
         if(bDebugMode())
      {
@@ -1217,7 +1242,7 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
                 {
                         /* x0=i*table.r/(5*table.n); */
                         x0=i*table.r/table.n;
-                       evaluate_table(table.tab,0,4,table.scale,x0,&y0,&yp);
+                       evaluate_table(table.data,0,4,table.scale,x0,&y0,&yp);
                         fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
                         
                 }
@@ -1235,7 +1260,7 @@ t_forcetable make_gb_table(FILE *out,const output_env_t oenv,
          Ftab = (r-0.25*r*expterm)/((r2+expterm)*sqrt(r2+expterm));
          
          
-        evaluate_table(table.tab,0,4,table.scale,r,&y0,&yp);
+        evaluate_table(table.data,0,4,table.scale,r,&y0,&yp);
          printf("gb: i=%d, x0=%g, y0=%15.15f, Vtab=%15.15f, yp=%15.15f, Ftab=%15.15f\n",i,r, y0, Vtab, yp, Ftab);
          
          abs_error_r=fabs(y0-Vtab);
@@ -1337,9 +1362,9 @@ t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
          * to do this :-)
          */
         
-    snew_aligned(table.tab,4*nx,16);
-       
-       copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,table.tab);
+    snew_aligned(table.data,4*nx,16);
+
+       copy2table(table.n,0,4,td[0].x,td[0].v,td[0].f,1.0,table.data);
         
         if(bDebugMode())
           {
@@ -1351,7 +1376,7 @@ t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
               {
                 /* x0=i*table.r/(5*table.n); */
                 x0 = i*table.r/(5*(table.n-1));
-               evaluate_table(table.tab,0,4,table.scale,x0,&y0,&yp);
+               evaluate_table(table.data,0,4,table.scale,x0,&y0,&yp);
                 fprintf(fp,"%15.10e  %15.10e  %15.10e\n",x0,y0,yp);
                 
               }
@@ -1360,6 +1385,13 @@ t_forcetable make_atf_table(FILE *out,const output_env_t oenv,
  
         done_tabledata(&(td[0]));
         sfree(td);
+
+    table.interaction   = GMX_TABLE_INTERACTION_ELEC_VDWREP_VDWDISP;
+    table.format        = GMX_TABLE_FORMAT_CUBICSPLINE_YFGH;
+    table.formatsize    = 4;
+    table.ninteractions = 3;
+    table.stride        = table.formatsize*table.ninteractions;
+
         
         return table;
  }
@@ -1386,8 +1418,8 @@ bondedtable_t make_bonded_table(FILE *fplog,char *fn,int angle)
    }
    tab.n = td.nx;
    tab.scale = td.tabscale;
-  snew(tab.tab,tab.n*4);
-  copy2table(tab.n,0,4,td.x,td.v,td.f,tab.tab);
+  snew(tab.data,tab.n*4);
+  copy2table(tab.n,0,4,td.x,td.v,td.f,1.0,tab.data);
    done_tabledata(&td);
  
    return tab;
diff --git a/src/mdlib/tpi.c b/src/mdlib/tpi.c

index a5a2a71893bba4bf96061f4ee1e420b4b73888d2..9a08e4ba35ab727daf0fa8aaea5e457f5d8075a3 100644 (file)
--- a/src/mdlib/tpi.c
+++ b/src/mdlib/tpi.c
@@ -568,7 +568,7 @@ double do_tpi(FILE *fplog,t_commrec *cr,
                           state->lambda,
                           NULL,fr,NULL,mu_tot,t,NULL,NULL,FALSE,
                           GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY |
-                         (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DOLR : 0) |
+                         (bNS ? GMX_FORCE_DYNAMICBOX | GMX_FORCE_NS | GMX_FORCE_DO_LR : 0) |
                           (bStateChanged ? GMX_FORCE_STATECHANGED : 0)); 
                  cr->nnodes = nnodes;
                  bStateChanged = FALSE;
diff --git a/src/mdlib/update.c b/src/mdlib/update.c

index 430bec3c523d0fdb609fb47737030daa506fac5b..932fc34f225b47acba1c4b643a6cbb580bda7676 100644 (file)
--- a/src/mdlib/update.c
+++ b/src/mdlib/update.c
@@ -1205,7 +1205,7 @@ static void deform(gmx_update_t upd,
      }
  }
  
-static void combine_forces(int nstlist,
+static void combine_forces(int nstcalclr,
                             gmx_constr_t constr,
                             t_inputrec *ir,t_mdatoms *md,t_idef *idef,
                             t_commrec *cr,
@@ -1235,10 +1235,10 @@ static void combine_forces(int nstlist,
                    NULL,NULL,nrnb,econqForce,ir->epc==epcMTTK,state->veta,state->veta);
      }
  
-    /* Add nstlist-1 times the LR force to the sum of both forces
+    /* Add nstcalclr-1 times the LR force to the sum of both forces
       * and store the result in forces_lr.
       */
-    nm1 = nstlist - 1;
+    nm1 = nstcalclr - 1;
      for(i=start; i<nrend; i++)
      {
          for(d=0; d<DIM; d++)
@@ -1709,7 +1709,7 @@ void update_coords(FILE         *fplog,
      tensor           vir_con;
      rvec             *vcom,*xcom,*vall,*xall,*xin,*vin,*forcein,*fall,*xpall,*xprimein,*xprime;
      int              nth,th;
-
+    
      /* Running the velocity half does nothing except for velocity verlet */
      if ((UpdatePart == etrtVELOCITY1 || UpdatePart == etrtVELOCITY2) &&
          !EI_VV(inputrec->eI))
@@ -1740,14 +1740,14 @@ void update_coords(FILE         *fplog,
      bNH = inputrec->etc == etcNOSEHOOVER;
      bPR = ((inputrec->epc == epcPARRINELLORAHMAN) || (inputrec->epc == epcMTTK));
  
-    if (bDoLR && inputrec->nstlist > 1 && !EI_VV(inputrec->eI))  /* get this working with VV? */
+    if (bDoLR && inputrec->nstcalclr > 1 && !EI_VV(inputrec->eI))  /* get this working with VV? */
      {
-        /* Store the total force + nstlist-1 times the LR force
+        /* Store the total force + nstcalclr-1 times the LR force
           * in forces_lr, so it can be used in a normal update algorithm
           * to produce twin time stepping.
           */
          /* is this correct in the new construction? MRS */
-        combine_forces(inputrec->nstlist,constr,inputrec,md,idef,cr,
+        combine_forces(inputrec->nstcalclr,constr,inputrec,md,idef,cr,
                         step,state,bMolPBC,
                         start,nrend,f,f_lr,nrnb);
          force = f_lr;
diff --git a/src/mdlib/wall.c b/src/mdlib/wall.c

index d1be14c8d246aaf77ddedf91a3cb78d2a5c83c4a..dd69de4b7806a72bc79503e6f679272708639ef2 100644 (file)
--- a/src/mdlib/wall.c
+++ b/src/mdlib/wall.c
@@ -83,7 +83,7 @@ void make_wall_tables(FILE *fplog,const output_env_t oenv,
         /* Since wall have no charge, we can compress the table */
         for(i=0; i<=tab->n; i++)
           for(j=0; j<8; j++)
-           tab->tab[8*i+j] = tab->tab[12*i+4+j];
+           tab->data[8*i+j] = tab->data[12*i+4+j];
        }
      }
    }
@@ -167,8 +167,9 @@ real do_walls(t_inputrec *ir,t_forcerec *fr,matrix box,t_mdatoms *md,
                  /* The wall energy groups are always at the end of the list */
                  ggid = gid[i]*ngid + ngid - nwall + w;
                  at = type[i];
-                Cd = nbfp[ntw[w]+2*at];
-                Cr = nbfp[ntw[w]+2*at+1];
+                /* nbfp now includes the 6.0/12.0 derivative prefactors */
+                Cd = nbfp[ntw[w]+2*at]/6.0;
+                Cr = nbfp[ntw[w]+2*at+1]/12.0;
                  if (!((Cd==0 && Cr==0) || (egp_flags[ggid] & EGP_EXCL)))
                  {
                      if (w == 0)
@@ -197,7 +198,7 @@ real do_walls(t_inputrec *ir,t_forcerec *fr,matrix box,t_mdatoms *md,
                          }
                          tab = &(fr->wall_tab[w][gid[i]]);
                          tabscale = tab->scale;
-                        VFtab    = tab->tab;
+                        VFtab    = tab->data;
                          
                          rt    = r*tabscale;
                          n0    = rt;
diff --git a/src/mdlib/wnblist.c b/src/mdlib/wnblist.c

index 16a6b4c16eba2de3f3e355f408cf0efc0eec184f..a1a4932d25f136349c2f33f856d975d41c29de6e 100644 (file)
--- a/src/mdlib/wnblist.c
+++ b/src/mdlib/wnblist.c
@@ -60,9 +60,9 @@ static void write_nblist(FILE *out,gmx_domdec_t *dd,t_nblist *nblist,int nDNL)
    gmx_domdec_zones_t *dd_zones;
  
    if (nblist->nri > 0) {  
-    fprintf(out,"il_name: %s  Solvent opt: %s\n",
-            nrnb_str(nblist->il_code),
-            enlist_names[nblist->enlist]);
+    fprintf(out,"ielec: %d, ivdw: %d, free_energy: %d, Solvent opt: %s\n",
+            nblist->ielec,nblist->ivdw,nblist->free_energy,
+            gmx_nblist_geometry_names[nblist->igeometry]);
      fprintf(out,"nri: %d  npair: %d\n",nblist->nri,nblist->nrj);
      if (dd) {
        dd_zones = domdec_zones(dd);
@@ -94,7 +94,7 @@ static void write_nblist(FILE *out,gmx_domdec_t *dd,t_nblist *nblist,int nDNL)
      if (nDNL >= 2) {
        for(i=0; i<nblist->nri; i++) {
         nii = 1;
-       if (nDNL >= 3 && nblist->enlist != enlistATOM_ATOM)
+       if (nDNL >= 3 && nblist->igeometry != GMX_NBLIST_GEOMETRY_PARTICLE_PARTICLE)
           nii = 3;
         nj = nblist->jindex[i+1] - nblist->jindex[i];
         fprintf(out,"i: %d shift: %d gid: %d nj: %d\n",
@@ -130,86 +130,7 @@ static void set_mat(FILE *fp,int **mat,int i0,int ni,int j0,int nj,
    }
  }
  
-int read_nblist(FILE *in,FILE *fp,int **mat,int natoms,gmx_bool bSymm)
-{
-    gmx_bool bNL;
-    char buf[256],b1[32],b2[32],solv[256],il_code[256];
-    int  i,ii,j,nnbl,full,icmp,nri,isolv;
-    int  iatom,nrj,nj,shift,gid,nargs,njtot=0;
-    
-    do {
-        if (fgets2(buf,255,in) == NULL)
-            gmx_fatal(FARGS,"EOF when looking for '%s' in logfile",header);
-    } while (strstr(buf,header) == NULL);
-    
-    do {
-      do {
-        if (fgets2(buf,255,in) == NULL)
-         return njtot;
-      } while (strstr(buf,"nri:") == NULL);
-      
-      if (0) {
-       if ((nargs = sscanf(buf,"%*s%s%*s%s",il_code,solv)) != 2) {
-         fprintf(stderr,"Can not find the right il_code\n");
-         return njtot;
-       }
-        for(isolv=0; (isolv<esolNR); isolv++)
-         if (strstr(esol_names[isolv],solv) != NULL)
-           break;
-       
-        if (isolv == esolNR) {
-         fprintf(stderr,"Can not read il_code or solv (nargs=%d)\n",nargs);
-         return njtot;
-        }
-      }
-      else
-       isolv = enlistATOM_ATOM;
-      
-      /* gmx_fatal(FARGS,"Can not read il_code or solv (nargs=%d)",nargs);*/
-      if ((nargs = sscanf(buf,"%*s%d%*s%d",&nri,&nrj)) != 2)
-       gmx_fatal(FARGS,"Can not read nri or nrj (nargs=%d)",nargs);
-      for(ii=0; (ii<nri); ii++) {
-       if ((nargs = fscanf(in,"%*s%d%*s%d%*s%d%*s%d",
-                           &iatom,&shift,&gid,&nj)) != 4)
-         gmx_fatal(FARGS,"Can not read iatom, shift gid or nj (nargs=%d)",nargs);
-       /* Number shifts from 1 to 27 iso 0 to 26 to distinguish uninitialized 
-        * matrix elements.
-        */
-       range_check(iatom,0,natoms);
-       for(i=0; (i<nj); i++) {
-         if ((nargs = fscanf(in,"%*s%d",&j)) != 1)
-           gmx_fatal(FARGS,"Can not read j");
-         range_check(j,0,natoms);
-         switch (isolv) {
-         case enlistATOM_ATOM:
-           set_mat(fp,mat,iatom,1,j,1,bSymm,shift);
-           njtot++;
-           break;
-         case enlistSPC_ATOM:
-           set_mat(fp,mat,iatom,3,j,1,bSymm,shift);
-           njtot+=3;
-           break;
-         case enlistSPC_SPC:
-           set_mat(fp,mat,iatom,3,j,3,bSymm,shift);
-           njtot+=9;
-           break;
-         case enlistTIP4P_ATOM:
-           set_mat(fp,mat,iatom,4,j,1,bSymm,shift);
-           njtot+=4;
-           break;
-         case enlistTIP4P_TIP4P:
-           set_mat(fp,mat,iatom,4,j,4,bSymm,shift);
-           njtot+=16;
-           break;
-         default:
-           gmx_incons("non-existing solvent type");
-         }
-       }
-      }
-      fprintf(fp,"nri = %d  nrj = %d\n",nri,nrj);
-    } while (TRUE);
-    return -1;
-}
+
  
  void dump_nblist(FILE *out,t_commrec *cr,t_forcerec *fr,int nDNL)
  {
diff --git a/src/tools/addconf.c b/src/tools/addconf.c

index a474002228e533172b9b74c13570949b6ab8fe4b..0c4337e169d6ab4a5862897f5be7697d2bfd2048 100644 (file)
--- a/src/tools/addconf.c
+++ b/src/tools/addconf.c
@@ -283,8 +283,7 @@ void do_nsgrid(FILE *fp,gmx_bool bVerbose,
    snew(dvdl,efptNR);
    init_neighbor_list(fp,fr,md->homenr);
    search_neighbours(fp,fr,x,box,top,
-                   &mtop->groups,cr,&nrnb,md,lambda,dvdl,NULL,
-                   TRUE,FALSE,FALSE,NULL);
+                   &mtop->groups,cr,&nrnb,md,lambda,dvdl,NULL,TRUE,FALSE);
  
    if (debug)
      dump_nblist(debug,cr,fr,0);
diff --git a/src/tools/calcpot.c b/src/tools/calcpot.c

index ff0a5bec4a2ea478b5b8643d7ab4cb9507b42302..96e359fd9d32e07c584f2ee74475ee62e3d76d6f 100644 (file)
--- a/src/tools/calcpot.c
+++ b/src/tools/calcpot.c
@@ -158,7 +158,7 @@ static void low_calc_pot(FILE *log,int nl_type,t_forcerec *fr,
    
    nlist = &fr->nblists[0].nlist_sr[nl_type];
    
-  c_tabpot(fr->nblists[0].tab.scale,fr->nblists[0].tab.tab,
+  c_tabpot(fr->nblists[0].table_elec_vdw.scale,fr->nblists[0].table_elec_vdw.data,
            nlist->nri,nlist->iinr,
            nlist->shift,nlist->jindex,nlist->jjnr,
            x[0],fr->epsfac,mdatoms->chargeA,pot,fr->shift_vec[0]);
@@ -191,7 +191,7 @@ void calc_pot(FILE *logf,t_commrec *cr,
     * also do the calculation of long range forces and energies.
     */
    ns(logf,fr,x,box,&mtop->groups,&(inputrec->opts),top,mdatoms,cr,
-     &nrnb,&lam[0],&dum[0],&enerd->grpp,TRUE,FALSE,FALSE,NULL);
+     &nrnb,&lam[0],&dum[0],&enerd->grpp,TRUE,FALSE);
    for(m=0; (m<DIM); m++)
      box_size[m] = box[m][m];
    for(i=0; (i<mdatoms->nr); i++)
diff --git a/src/tools/gmx_h2order.c b/src/tools/gmx_h2order.c

index 73435a7941a88b68f99f74e636c2f4fcc874023e..871730bf4a7857bf25ef7794b654d7dccd34721b 100644 (file)
--- a/src/tools/gmx_h2order.c
+++ b/src/tools/gmx_h2order.c
@@ -307,8 +307,8 @@ int gmx_h2order(int argc,char *argv[])
    
    if (bMicel)
      rd_index(opt2fn("-nm",NFILE,fnm), 1, &nmic, &micelle, &micname);
-
-  calc_h2order(ftp2fn(efTRX,NFILE,fnm), index, ngx, &slDipole, &slOrder, 
+    
+  calc_h2order(ftp2fn(efTRX,NFILE,fnm), index, ngx, &slDipole, &slOrder,
                &slWidth, &nslices, top, ePBC, axis, bMicel, micelle, nmic,
                 oenv);
author	Erik Lindahl <erik@kth.se>
	Sun, 28 Oct 2012 18:45:52 +0000 (14:45 -0400)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Thu, 8 Nov 2012 07:42:10 +0000 (08:42 +0100)