SIMD acceleration for RB dihedrals

[alexxy/gromacs.git] / src / gromacs / gmxlib / bondfree.c
diff --git a/src/gromacs/gmxlib/bondfree.c b/src/gromacs/gmxlib/bondfree.c

index dfc6bf2fc57b00328b0a2744cad01262078b8a03..0eb8d55791b983c0b5909786b7dffdb1c5ef462a 100644 (file)
--- a/src/gromacs/gmxlib/bondfree.c
+++ b/src/gromacs/gmxlib/bondfree.c
@@ -45,7 +45,7 @@
  #include "gromacs/math/utilities.h"
  #include "txtdump.h"
  #include "bondf.h"
-#include "smalloc.h"
+#include "gromacs/utility/smalloc.h"
  #include "pbc.h"
  #include "ns.h"
  #include "macros.h"
@@ -733,7 +733,8 @@ real water_pol(int nbonds,
       * a shell connected to a dummy with spring constant that differ in the
       * three spatial dimensions in the molecular frame.
       */
-    int  i, m, aO, aH1, aH2, aD, aS, type, type0;
+    int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
+    ivec dt;
      rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
  #ifdef DEBUG
      rvec df;
@@ -776,11 +777,11 @@ real water_pol(int nbonds,
              aS   = forceatoms[i+5];
  
              /* Compute vectors describing the water frame */
-            rvec_sub(x[aH1], x[aO], dOH1);
-            rvec_sub(x[aH2], x[aO], dOH2);
-            rvec_sub(x[aH2], x[aH1], dHH);
-            rvec_sub(x[aD], x[aO], dOD);
-            rvec_sub(x[aS], x[aD], dDS);
+            pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
+            pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
+            pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
+            pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
+            ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
              cprod(dOH1, dOH2, nW);
  
              /* Compute inverse length of normal vector
@@ -835,6 +836,13 @@ real water_pol(int nbonds,
              kdx[YY] = kk[YY]*dx[YY];
              kdx[ZZ] = kk[ZZ]*dx[ZZ];
              vtot   += iprod(dx, kdx);
+
+            if (g)
+            {
+                ivec_sub(SHIFT_IVEC(g, aS), SHIFT_IVEC(g, aD), dt);
+                ki = IVEC2IS(dt);
+            }
+
              for (m = 0; (m < DIM); m++)
              {
                  /* This is a tensor operation but written out for speed */
@@ -845,8 +853,10 @@ real water_pol(int nbonds,
  #ifdef DEBUG
                  df[m] = fij;
  #endif
-                f[aS][m] += fij;
-                f[aD][m] -= fij;
+                f[aS][m]           += fij;
+                f[aD][m]           -= fij;
+                fshift[ki][m]      += fij;
+                fshift[CENTRAL][m] -= fij;
              }
  #ifdef DEBUG
              if (debug)
@@ -1979,7 +1989,6 @@ pdihs_noener_simd(int nbonds,
      const int             nfa1 = 5;
      int                   i, iu, s;
      int                   type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH];
-    int                   t1[GMX_SIMD_REAL_WIDTH], t2[GMX_SIMD_REAL_WIDTH], t3[GMX_SIMD_REAL_WIDTH];
      real                  ddphi;
      real                  dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr;
      real                  buf_array[7*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf;
@@ -2094,6 +2103,155 @@ pdihs_noener_simd(int nbonds,
      }
  }
  
+/* This is mostly a copy of pdihs_noener_simd above, but with using
+ * the RB potential instead of a harmonic potential.
+ * This function can replace rbdihs() when no energy and virial are needed.
+ */
+static void
+rbdihs_noener_simd(int nbonds,
+                   const t_iatom forceatoms[], const t_iparams forceparams[],
+                   const rvec x[], rvec f[],
+                   const t_pbc *pbc, const t_graph gmx_unused *g,
+                   real gmx_unused lambda,
+                   const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
+                   int gmx_unused *global_atom_index)
+{
+    const int             nfa1 = 5;
+    int                   i, iu, s, j;
+    int                   type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH];
+    real                  ddphi;
+    real                  dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr;
+    real                  buf_array[(NR_RBDIHS + 4)*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf;
+    real                 *parm, *phi, *p, *q, *sf_i, *msf_l;
+
+    gmx_simd_real_t       phi_S;
+    gmx_simd_real_t       ddphi_S, cosfac_S;
+    gmx_simd_real_t       mx_S, my_S, mz_S;
+    gmx_simd_real_t       nx_S, ny_S, nz_S;
+    gmx_simd_real_t       nrkj_m2_S, nrkj_n2_S;
+    gmx_simd_real_t       parm_S, c_S;
+    gmx_simd_real_t       sin_S, cos_S;
+    gmx_simd_real_t       sf_i_S, msf_l_S;
+    pbc_simd_t            pbc_simd;
+
+    gmx_simd_real_t       pi_S  = gmx_simd_set1_r(M_PI);
+    gmx_simd_real_t       one_S = gmx_simd_set1_r(1.0);
+
+    /* Ensure SIMD register alignment */
+    dr  = gmx_simd_align_r(dr_array);
+    buf = gmx_simd_align_r(buf_array);
+
+    /* Extract aligned pointer for parameters and variables */
+    parm  = buf;
+    p     = buf + (NR_RBDIHS + 0)*GMX_SIMD_REAL_WIDTH;
+    q     = buf + (NR_RBDIHS + 1)*GMX_SIMD_REAL_WIDTH;
+    sf_i  = buf + (NR_RBDIHS + 2)*GMX_SIMD_REAL_WIDTH;
+    msf_l = buf + (NR_RBDIHS + 3)*GMX_SIMD_REAL_WIDTH;
+
+    set_pbc_simd(pbc, &pbc_simd);
+
+    /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
+    for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
+    {
+        /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
+         * iu indexes into forceatoms, we should not let iu go beyond nbonds.
+         */
+        iu = i;
+        for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
+        {
+            type  = forceatoms[iu];
+            ai[s] = forceatoms[iu+1];
+            aj[s] = forceatoms[iu+2];
+            ak[s] = forceatoms[iu+3];
+            al[s] = forceatoms[iu+4];
+
+            /* We don't need the first parameter, since that's a constant
+             * which only affects the energies, not the forces.
+             */
+            for (j = 1; j < NR_RBDIHS; j++)
+            {
+                parm[j*GMX_SIMD_REAL_WIDTH + s] =
+                    forceparams[type].rbdihs.rbcA[j];
+            }
+
+            /* At the end fill the arrays with identical entries */
+            if (iu + nfa1 < nbonds)
+            {
+                iu += nfa1;
+            }
+        }
+
+        /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
+        dih_angle_simd(x, ai, aj, ak, al, &pbc_simd,
+                       dr,
+                       &phi_S,
+                       &mx_S, &my_S, &mz_S,
+                       &nx_S, &ny_S, &nz_S,
+                       &nrkj_m2_S,
+                       &nrkj_n2_S,
+                       p, q);
+
+        /* Change to polymer convention */
+        phi_S = gmx_simd_sub_r(phi_S, pi_S);
+
+        gmx_simd_sincos_r(phi_S, &sin_S, &cos_S);
+
+        ddphi_S   = gmx_simd_setzero_r();
+        c_S       = one_S;
+        cosfac_S  = one_S;
+        for (j = 1; j < NR_RBDIHS; j++)
+        {
+            parm_S   = gmx_simd_load_r(parm + j*GMX_SIMD_REAL_WIDTH);
+            ddphi_S  = gmx_simd_fmadd_r(gmx_simd_mul_r(c_S, parm_S), cosfac_S, ddphi_S);
+            cosfac_S = gmx_simd_mul_r(cosfac_S, cos_S);
+            c_S      = gmx_simd_add_r(c_S, one_S);
+        }
+
+        /* Note that here we do not use the minus sign which is present
+         * in the normal RB code. This is corrected for through (m)sf below.
+         */
+        ddphi_S  = gmx_simd_mul_r(ddphi_S, sin_S);
+
+        sf_i_S   = gmx_simd_mul_r(ddphi_S, nrkj_m2_S);
+        msf_l_S  = gmx_simd_mul_r(ddphi_S, nrkj_n2_S);
+
+        /* After this m?_S will contain f[i] */
+        mx_S     = gmx_simd_mul_r(sf_i_S, mx_S);
+        my_S     = gmx_simd_mul_r(sf_i_S, my_S);
+        mz_S     = gmx_simd_mul_r(sf_i_S, mz_S);
+
+        /* After this m?_S will contain -f[l] */
+        nx_S     = gmx_simd_mul_r(msf_l_S, nx_S);
+        ny_S     = gmx_simd_mul_r(msf_l_S, ny_S);
+        nz_S     = gmx_simd_mul_r(msf_l_S, nz_S);
+
+        gmx_simd_store_r(dr + 0*GMX_SIMD_REAL_WIDTH, mx_S);
+        gmx_simd_store_r(dr + 1*GMX_SIMD_REAL_WIDTH, my_S);
+        gmx_simd_store_r(dr + 2*GMX_SIMD_REAL_WIDTH, mz_S);
+        gmx_simd_store_r(dr + 3*GMX_SIMD_REAL_WIDTH, nx_S);
+        gmx_simd_store_r(dr + 4*GMX_SIMD_REAL_WIDTH, ny_S);
+        gmx_simd_store_r(dr + 5*GMX_SIMD_REAL_WIDTH, nz_S);
+
+        iu = i;
+        s  = 0;
+        do
+        {
+            do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s],
+                                        p[s], q[s],
+                                        dr[     XX *GMX_SIMD_REAL_WIDTH+s],
+                                        dr[     YY *GMX_SIMD_REAL_WIDTH+s],
+                                        dr[     ZZ *GMX_SIMD_REAL_WIDTH+s],
+                                        dr[(DIM+XX)*GMX_SIMD_REAL_WIDTH+s],
+                                        dr[(DIM+YY)*GMX_SIMD_REAL_WIDTH+s],
+                                        dr[(DIM+ZZ)*GMX_SIMD_REAL_WIDTH+s],
+                                        f);
+            s++;
+            iu += nfa1;
+        }
+        while (s < GMX_SIMD_REAL_WIDTH && iu < nbonds);
+    }
+}
+
  #endif /* GMX_SIMD_HAVE_REAL */
  
  
@@ -2200,6 +2358,7 @@ static void posres_dx(const rvec x, const rvec pos0A, const rvec pos0B,
                      /* Box relative coordinates are stored for dimensions with pbc */
                      posA *= pbc->box[m][m];
                      posB *= pbc->box[m][m];
+                    assert(npbcdim <= DIM);
                      for (d = m+1; d < npbcdim; d++)
                      {
                          posA += pos0A[d]*pbc->box[d][m];
@@ -2264,6 +2423,7 @@ real fbposres(int nbonds,
          clear_rvec(com_sc);
          for (m = 0; m < npbcdim; m++)
          {
+            assert(npbcdim <= DIM);
              for (d = m; d < npbcdim; d++)
              {
                  com_sc[m] += com[d]*pbc->box[d][m];
@@ -2389,6 +2549,7 @@ real posres(int nbonds,
          clear_rvec(comB_sc);
          for (m = 0; m < npbcdim; m++)
          {
+            assert(npbcdim <= DIM);
              for (d = m; d < npbcdim; d++)
              {
                  comA_sc[m] += comA[d]*pbc->box[d][m];
@@ -2419,7 +2580,7 @@ real posres(int nbonds,
              vtot       += 0.5*kk*dx[m]*dx[m];
              *dvdlambda +=
                  0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
-                -fm*dpdl[m];
+                + fm*dpdl[m];
  
              /* Here we correct for the pbc_dx which included rdist */
              if (bForceValid)
@@ -4508,6 +4669,19 @@ static real calc_one_bond(FILE *fplog, int thread,
                  global_atom_index);
              v = 0;
          }
+#ifdef GMX_SIMD_HAVE_REAL
+        else if (ftype == F_RBDIHS &&
+                 !bCalcEnerVir && fr->efep == efepNO)
+        {
+            /* No energies, shift forces, dvdl */
+            rbdihs_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
+                               idef->iparams,
+                               (const rvec*)x, f,
+                               pbc, g, lambda[efptFTYPE], md, fcd,
+                               global_atom_index);
+            v = 0;
+        }
+#endif
          else
          {
              v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
@@ -4586,7 +4760,7 @@ void calc_bonds(FILE *fplog, const gmx_multisim_t *ms,
      }
      if (bPrintSepPot)
      {
-        fprintf(fplog, "Step %s: bonded V and dVdl for this node\n",
+        fprintf(fplog, "Step %s: bonded V and dVdl for this rank\n",
                  gmx_step_str(step, buf));
      }