Sort all includes in src/gromacs

[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_atomdata.c
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.c b/src/gromacs/mdlib/nbnxn_atomdata.c

index 157f788e3fa539a6b88cea65eafc3bed76c61ff8..ca0f4999bc7fb28600001e0ff32914f15114f9ec 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_atomdata.c
+++ b/src/gromacs/mdlib/nbnxn_atomdata.c
@@ -33,21 +33,29 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "nbnxn_atomdata.h"
  
+#include "config.h"
+
+#include <assert.h>
  #include <math.h>
+#include <stdlib.h>
  #include <string.h>
-#include <assert.h>
-#include "smalloc.h"
-#include "macros.h"
-#include "vec.h"
-#include "nbnxn_consts.h"
-#include "nbnxn_internal.h"
-#include "nbnxn_search.h"
+
+#include "thread_mpi/atomic.h"
+
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/mdlib/nbnxn_internal.h"
+#include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/pbcutil/ishift.h"
  #include "gromacs/utility/gmxomp.h"
-#include "gmx_omp_nthreads.h"
+#include "gromacs/utility/smalloc.h"
  
  /* Default nbnxn allocation routine, allocates NBNXN_MEM_ALIGN byte aligned */
  void nbnxn_alloc_aligned(void **ptr, size_t nbytes)
@@ -359,22 +367,47 @@ void copy_rvec_to_nbat_real(const int *a, int na, int na_round,
      }
  }
  
-/* Stores the LJ parameter data in a format convenient for the SIMD kernels */
-static void set_ljparam_simd_data(nbnxn_atomdata_t *nbat)
+/* Stores the LJ parameter data in a format convenient for different kernels */
+static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
  {
      int  nt, i, j;
      real c6, c12;
  
      nt = nbat->ntype;
  
+    if (bSIMD)
+    {
+        /* nbfp_s4 stores two parameters using a stride of 4,
+         * because this would suit x86 SIMD single-precision
+         * quad-load intrinsics. There's a slight inefficiency in
+         * allocating and initializing nbfp_s4 when it might not
+         * be used, but introducing the conditional code is not
+         * really worth it. */
+        nbat->alloc((void **)&nbat->nbfp_s4, nt*nt*4*sizeof(*nbat->nbfp_s4));
+        for (i = 0; i < nt; i++)
+        {
+            for (j = 0; j < nt; j++)
+            {
+                nbat->nbfp_s4[(i*nt+j)*4+0] = nbat->nbfp[(i*nt+j)*2+0];
+                nbat->nbfp_s4[(i*nt+j)*4+1] = nbat->nbfp[(i*nt+j)*2+1];
+                nbat->nbfp_s4[(i*nt+j)*4+2] = 0;
+                nbat->nbfp_s4[(i*nt+j)*4+3] = 0;
+            }
+        }
+    }
+
+    /* We use combination rule data for SIMD combination rule kernels
+     * and with LJ-PME kernels. We then only need parameters per atom type,
+     * not per pair of atom types.
+     */
      switch (nbat->comb_rule)
      {
-        case  ljcrGEOM:
+        case ljcrGEOM:
              nbat->comb_rule = ljcrGEOM;
  
              for (i = 0; i < nt; i++)
              {
-                /* Copy the diagonal from the nbfp matrix */
+                /* Store the sqrt of the diagonal from the nbfp matrix */
                  nbat->nbfp_comb[i*2  ] = sqrt(nbat->nbfp[(i*nt+i)*2  ]);
                  nbat->nbfp_comb[i*2+1] = sqrt(nbat->nbfp[(i*nt+i)*2+1]);
              }
@@ -401,23 +434,7 @@ static void set_ljparam_simd_data(nbnxn_atomdata_t *nbat)
              }
              break;
          case ljcrNONE:
-            /* nbfp_s4 stores two parameters using a stride of 4,
-             * because this would suit x86 SIMD single-precision
-             * quad-load intrinsics. There's a slight inefficiency in
-             * allocating and initializing nbfp_s4 when it might not
-             * be used, but introducing the conditional code is not
-             * really worth it. */
-            nbat->alloc((void **)&nbat->nbfp_s4, nt*nt*4*sizeof(*nbat->nbfp_s4));
-            for (i = 0; i < nt; i++)
-            {
-                for (j = 0; j < nt; j++)
-                {
-                    nbat->nbfp_s4[(i*nt+j)*4+0] = nbat->nbfp[(i*nt+j)*2+0];
-                    nbat->nbfp_s4[(i*nt+j)*4+1] = nbat->nbfp[(i*nt+j)*2+1];
-                    nbat->nbfp_s4[(i*nt+j)*4+2] = 0;
-                    nbat->nbfp_s4[(i*nt+j)*4+3] = 0;
-                }
-            }
+            /* We always store the full matrix (see code above) */
              break;
          default:
              gmx_incons("Unknown combination rule");
@@ -514,7 +531,7 @@ nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
  void nbnxn_atomdata_init(FILE *fp,
                           nbnxn_atomdata_t *nbat,
                           int nb_kernel_type,
-                         gmx_bool bTryCombinationRule,
+                         int enbnxninitcombrule,
                           int ntype, const real *nbfp,
                           int n_energygroups,
                           int nout,
@@ -524,7 +541,7 @@ void nbnxn_atomdata_init(FILE *fp,
      int      i, j, nth;
      real     c6, c12, tol;
      char    *ptr;
-    gmx_bool simple, bCombGeom, bCombLB;
+    gmx_bool simple, bCombGeom, bCombLB, bSIMD;
  
      if (alloc == NULL)
      {
@@ -636,51 +653,60 @@ void nbnxn_atomdata_init(FILE *fp,
  
      simple = nbnxn_kernel_pairlist_simple(nb_kernel_type);
  
-    if (bTryCombinationRule)
+    switch (enbnxninitcombrule)
      {
-        /* We prefer the geometic combination rule,
-         * as that gives a slightly faster kernel than the LB rule.
-         */
-        if (bCombGeom)
-        {
-            nbat->comb_rule = ljcrGEOM;
-        }
-        else if (bCombLB)
-        {
-            nbat->comb_rule = ljcrLB;
-        }
-        else
-        {
-            nbat->comb_rule = ljcrNONE;
-
-            nbat->free(nbat->nbfp_comb);
-        }
-
-        if (fp)
-        {
-            if (nbat->comb_rule == ljcrNONE)
+        case enbnxninitcombruleDETECT:
+            /* We prefer the geometic combination rule,
+             * as that gives a slightly faster kernel than the LB rule.
+             */
+            if (bCombGeom)
+            {
+                nbat->comb_rule = ljcrGEOM;
+            }
+            else if (bCombLB)
              {
-                fprintf(fp, "Using full Lennard-Jones parameter combination matrix\n\n");
+                nbat->comb_rule = ljcrLB;
              }
              else
              {
-                fprintf(fp, "Using %s Lennard-Jones combination rule\n\n",
-                        nbat->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
+                nbat->comb_rule = ljcrNONE;
+
+                nbat->free(nbat->nbfp_comb);
              }
-        }
-    }
-    else
-    {
-        nbat->comb_rule = ljcrNONE;
  
-        nbat->free(nbat->nbfp_comb);
-    }
+            if (fp)
+            {
+                if (nbat->comb_rule == ljcrNONE)
+                {
+                    fprintf(fp, "Using full Lennard-Jones parameter combination matrix\n\n");
+                }
+                else
+                {
+                    fprintf(fp, "Using %s Lennard-Jones combination rule\n\n",
+                            nbat->comb_rule == ljcrGEOM ? "geometric" : "Lorentz-Berthelot");
+                }
+            }
+            break;
+        case enbnxninitcombruleGEOM:
+            nbat->comb_rule = ljcrGEOM;
+            break;
+        case enbnxninitcombruleLB:
+            nbat->comb_rule = ljcrLB;
+            break;
+        case enbnxninitcombruleNONE:
+            nbat->comb_rule = ljcrNONE;
  
-    if (simple)
-    {
-        set_ljparam_simd_data(nbat);
+            nbat->free(nbat->nbfp_comb);
+            break;
+        default:
+            gmx_incons("Unknown enbnxninitcombrule");
      }
  
+    bSIMD = (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
+             nb_kernel_type == nbnxnk4xN_SIMD_2xNN);
+
+    set_lj_parameter_data(nbat, bSIMD);
+
      nbat->natoms  = 0;
      nbat->type    = NULL;
      nbat->lj_comb = NULL;
@@ -688,27 +714,25 @@ void nbnxn_atomdata_init(FILE *fp,
      {
          int pack_x;
  
-        switch (nb_kernel_type)
+        if (bSIMD)
          {
-            case nbnxnk4xN_SIMD_4xN:
-            case nbnxnk4xN_SIMD_2xNN:
-                pack_x = max(NBNXN_CPU_CLUSTER_I_SIZE,
-                             nbnxn_kernel_to_cj_size(nb_kernel_type));
-                switch (pack_x)
-                {
-                    case 4:
-                        nbat->XFormat = nbatX4;
-                        break;
-                    case 8:
-                        nbat->XFormat = nbatX8;
-                        break;
-                    default:
-                        gmx_incons("Unsupported packing width");
-                }
-                break;
-            default:
-                nbat->XFormat = nbatXYZ;
-                break;
+            pack_x = max(NBNXN_CPU_CLUSTER_I_SIZE,
+                         nbnxn_kernel_to_cj_size(nb_kernel_type));
+            switch (pack_x)
+            {
+                case 4:
+                    nbat->XFormat = nbatX4;
+                    break;
+                case 8:
+                    nbat->XFormat = nbatX8;
+                    break;
+                default:
+                    gmx_incons("Unsupported packing width");
+            }
+        }
+        else
+        {
+            nbat->XFormat = nbatXYZ;
          }
  
          nbat->FFormat = nbat->XFormat;
@@ -833,7 +857,7 @@ static void copy_lj_to_nbat_lj_comb_x8(const real *ljparam_type,
      }
  }
  
-/* Sets the atom type and LJ data in nbnxn_atomdata_t */
+/* Sets the atom type in nbnxn_atomdata_t */
  static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t    *nbat,
                                           int                  ngrid,
                                           const nbnxn_search_t nbs,
@@ -854,9 +878,30 @@ static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t    *nbat,
  
              copy_int_to_nbat_int(nbs->a+ash, grid->cxy_na[i], ncz*grid->na_sc,
                                   type, nbat->ntype-1, nbat->type+ash);
+        }
+    }
+}
  
-            if (nbat->comb_rule != ljcrNONE)
+/* Sets the LJ combination rule parameters in nbnxn_atomdata_t */
+static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t    *nbat,
+                                            int                  ngrid,
+                                            const nbnxn_search_t nbs)
+{
+    int                 g, i, ncz, ash;
+    const nbnxn_grid_t *grid;
+
+    if (nbat->comb_rule != ljcrNONE)
+    {
+        for (g = 0; g < ngrid; g++)
+        {
+            grid = &nbs->grid[g];
+
+            /* Loop over all columns and copy and fill */
+            for (i = 0; i < grid->ncx*grid->ncy; i++)
              {
+                ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i];
+                ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc;
+
                  if (nbat->XFormat == nbatX4)
                  {
                      copy_lj_to_nbat_lj_comb_x4(nbat->nbfp_comb,
@@ -929,6 +974,67 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
      }
  }
  
+/* Set the charges of perturbed atoms in nbnxn_atomdata_t to 0.
+ * This is to automatically remove the RF/PME self term in the nbnxn kernels.
+ * Part of the zero interactions are still calculated in the normal kernels.
+ * All perturbed interactions are calculated in the free energy kernel,
+ * using the original charge and LJ data, not nbnxn_atomdata_t.
+ */
+static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t    *nbat,
+                                    int                  ngrid,
+                                    const nbnxn_search_t nbs)
+{
+    real               *q;
+    int                 stride_q, g, nsubc, c_offset, c, subc, i, ind;
+    const nbnxn_grid_t *grid;
+
+    if (nbat->XFormat == nbatXYZQ)
+    {
+        q        = nbat->x + ZZ + 1;
+        stride_q = STRIDE_XYZQ;
+    }
+    else
+    {
+        q        = nbat->q;
+        stride_q = 1;
+    }
+
+    for (g = 0; g < ngrid; g++)
+    {
+        grid = &nbs->grid[g];
+        if (grid->bSimple)
+        {
+            nsubc = 1;
+        }
+        else
+        {
+            nsubc = GPU_NSUBCELL;
+        }
+
+        c_offset = grid->cell0*grid->na_sc;
+
+        /* Loop over all columns and copy and fill */
+        for (c = 0; c < grid->nc*nsubc; c++)
+        {
+            /* Does this cluster contain perturbed particles? */
+            if (grid->fep[c] != 0)
+            {
+                for (i = 0; i < grid->na_c; i++)
+                {
+                    /* Is this a perturbed particle? */
+                    if (grid->fep[c] & (1 << i))
+                    {
+                        ind = c_offset + c*grid->na_c + i;
+                        /* Set atom type and charge to non-interacting */
+                        nbat->type[ind] = nbat->ntype - 1;
+                        q[ind*stride_q] = 0;
+                    }
+                }
+            }
+        }
+    }
+}
+
  /* Copies the energy group indices to a reordered and packed array */
  static void copy_egp_to_nbat_egps(const int *a, int na, int na_round,
                                    int na_c, int bit_shift,
@@ -968,6 +1074,11 @@ static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t    *nbat,
      int                 g, i, ncz, ash;
      const nbnxn_grid_t *grid;
  
+    if (nbat->nenergrp == 1)
+    {
+        return;
+    }
+
      for (g = 0; g < ngrid; g++)
      {
          grid = &nbs->grid[g];
@@ -1007,10 +1118,15 @@ void nbnxn_atomdata_set(nbnxn_atomdata_t    *nbat,
  
      nbnxn_atomdata_set_charges(nbat, ngrid, nbs, mdatoms->chargeA);
  
-    if (nbat->nenergrp > 1)
+    if (nbs->bFEP)
      {
-        nbnxn_atomdata_set_energygroups(nbat, ngrid, nbs, atinfo);
+        nbnxn_atomdata_mask_fep(nbat, ngrid, nbs);
      }
+
+    /* This must be done after masking types for FEP */
+    nbnxn_atomdata_set_ljcombparams(nbat, ngrid, nbs);
+
+    nbnxn_atomdata_set_energygroups(nbat, ngrid, nbs, atinfo);
  }
  
  /* Copies the shift vector array to nbnxn_atomdata_t */