From: Berk Hess <hess@kth.se>
Date: Fri, 14 Aug 2020 19:23:02 +0000 (+0000)
Subject: Add SIMD bonded tests and improve SIMD angles()
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=fb7a6480db5914a50a97618b3e6316663041aafb;p=alexxy%2Fgromacs.git

Add SIMD bonded tests and improve SIMD angles()

The listed_forces module bonded tests are now also executed for the
force-only and SIMD flavor of the bonded kernels, when available.

Improved precision of SIMD angles kernel for small angles by avoiding
two invsqrt operations for computing the sin.
---

diff --git a/src/gromacs/listed_forces/bonded.cpp b/src/gromacs/listed_forces/bonded.cpp
index 60b42ca235..113c5efd65 100644
--- a/src/gromacs/listed_forces/bonded.cpp
+++ b/src/gromacs/listed_forces/bonded.cpp
@@ -997,6 +997,7 @@ angles(int             nbonds,
     SimdReal                         nrij2_S, nrij_1_S;
     SimdReal                         nrkj2_S, nrkj_1_S;
     SimdReal                         cos_S, invsin_S;
+    SimdReal                         cos2_S;
     SimdReal                         theta_S;
     SimdReal                         st_S, sth_S;
     SimdReal                         cik_S, cii_S, ckk_S;
@@ -1065,6 +1066,16 @@ angles(int             nbonds,
 
         cos_S = rij_rkj_S * nrij_1_S * nrkj_1_S;
 
+        /* We compute cos^2 using a division instead of squaring cos_S,
+         * as we would then get additional error contributions from
+         * the two invsqrt operations, which get amplified by
+         * the 1/sqrt(1-cos^2) for cos values close to 1.
+         *
+         * Note that the non-SIMD version of angles() avoids this issue
+         * by computing the cosine using double precision.
+         */
+        cos2_S = rij_rkj_S * rij_rkj_S / (nrij2_S * nrkj2_S);
+
         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
          * This also ensures that rounding errors would cause the argument
@@ -1076,7 +1087,7 @@ angles(int             nbonds,
 
         theta_S = acos(cos_S);
 
-        invsin_S = invsqrt(one_S - cos_S * cos_S);
+        invsin_S = invsqrt(one_S - cos2_S);
 
         st_S  = k_S * (theta0_S - theta_S) * invsin_S;
         sth_S = st_S * cos_S;
diff --git a/src/gromacs/listed_forces/tests/bonded.cpp b/src/gromacs/listed_forces/tests/bonded.cpp
index f1daf94f52..caaa6e09ad 100644
--- a/src/gromacs/listed_forces/tests/bonded.cpp
+++ b/src/gromacs/listed_forces/tests/bonded.cpp
@@ -51,6 +51,7 @@
 #include <gtest/gtest.h>
 
 #include "gromacs/listed_forces/listed_forces.h"
+#include "gromacs/math/paddedvector.h"
 #include "gromacs/math/units.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/math/vectypes.h"
@@ -85,20 +86,29 @@ struct OutputQuantities
     //! Shift vectors
     rvec fshift[N_IVEC] = { { 0 } };
     //! Forces
-    rvec4 f[c_numAtoms] = { { 0 } };
+    alignas(GMX_REAL_MAX_SIMD_WIDTH * sizeof(real)) rvec4 f[c_numAtoms] = { { 0 } };
 };
 
 /*! \brief Utility to check the output from bonded tests
  *
  * \param[in] checker Reference checker
  * \param[in] output  The output from the test to check
+ * \param[in] bondedKernelFlavor  Flavor for determining what output to check
  */
-void checkOutput(test::TestReferenceChecker* checker, const OutputQuantities& output)
+void checkOutput(test::TestReferenceChecker* checker,
+                 const OutputQuantities&     output,
+                 const BondedKernelFlavor    bondedKernelFlavor)
 {
-    checker->checkReal(output.energy, "Epot ");
-    // Should still be zero if not doing FEP, so may as well test it.
-    checker->checkReal(output.dvdlambda, "dVdlambda ");
-    checker->checkVector(output.fshift[CENTRAL], "Central shift forces");
+    if (computeEnergy(bondedKernelFlavor))
+    {
+        checker->checkReal(output.energy, "Epot ");
+        // Should still be zero when not doing FEP, so may as well test it.
+        checker->checkReal(output.dvdlambda, "dVdlambda ");
+    }
+    if (computeVirial(bondedKernelFlavor))
+    {
+        checker->checkVector(output.fshift[CENTRAL], "Central shift forces");
+    }
     checker->checkSequence(std::begin(output.f), std::end(output.f), "Forces");
 }
 
@@ -514,12 +524,12 @@ void fillIatoms(int ftype, std::vector<t_iatom>* iatoms)
 }
 
 class ListedForcesTest :
-    public ::testing::TestWithParam<std::tuple<iListInput, std::vector<gmx::RVec>, PbcType>>
+    public ::testing::TestWithParam<std::tuple<iListInput, PaddedVector<RVec>, PbcType>>
 {
 protected:
     matrix                     box_;
     t_pbc                      pbc_;
-    std::vector<gmx::RVec>     x_;
+    PaddedVector<RVec>         x_;
     PbcType                    pbcType_;
     iListInput                 input_;
     test::TestReferenceData    refData_;
@@ -545,16 +555,29 @@ protected:
         std::vector<real> chargeA    = { 1.5, -2.0, 1.5, -1.0 };
         t_mdatoms         mdatoms    = { 0 };
         mdatoms.chargeA              = chargeA.data();
-        OutputQuantities output;
-        output.energy = calculateSimpleBond(input_.ftype, iatoms.size(), iatoms.data(),
-                                            &input_.iparams, as_rvec_array(x_.data()), output.f,
-                                            output.fshift, &pbc_, lambda, &output.dvdlambda, &mdatoms,
-                                            /* struct t_fcdata * */ nullptr, ddgatindex.data(),
-                                            BondedKernelFlavor::ForcesAndVirialAndEnergy);
-        // Internal consistency test of both test input
-        // and bonded functions.
-        EXPECT_TRUE((input_.fep || (output.dvdlambda == 0.0))) << "dvdlambda was " << output.dvdlambda;
-        checkOutput(checker, output);
+        /* Here we run both the standard, plain-C force+shift-forcs+energy+free-energy
+         * kernel flavor and the potentially optimized, with SIMD and less output,
+         * force only kernels. Note that we also run the optimized kernel for free-energy
+         * input when lambda=0, as the force output should match the non free-energy case.
+         */
+        std::vector<BondedKernelFlavor> flavors = { BondedKernelFlavor::ForcesAndVirialAndEnergy };
+        if (!input_.fep || lambda == 0)
+        {
+            flavors.push_back(BondedKernelFlavor::ForcesSimdWhenAvailable);
+        }
+        for (const auto flavor : flavors)
+        {
+            OutputQuantities output;
+            output.energy =
+                    calculateSimpleBond(input_.ftype, iatoms.size(), iatoms.data(), &input_.iparams,
+                                        as_rvec_array(x_.data()), output.f, output.fshift, &pbc_,
+                                        lambda, &output.dvdlambda, &mdatoms,
+                                        /* struct t_fcdata * */ nullptr, ddgatindex.data(), flavor);
+            // Internal consistency test of both test input
+            // and bonded functions.
+            EXPECT_TRUE((input_.fep || (output.dvdlambda == 0.0))) << "dvdlambda was " << output.dvdlambda;
+            checkOutput(checker, output, flavor);
+        }
     }
     void testIfunc()
     {
@@ -653,7 +676,7 @@ std::vector<iListInput> c_InputRestraints = {
 };
 
 //! Coordinates for testing
-std::vector<std::vector<gmx::RVec>> c_coordinatesForTests = {
+std::vector<PaddedVector<RVec>> c_coordinatesForTests = {
     { { 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.2 }, { 0.005, 0.0, 0.1 }, { -0.001, 0.1, 0.0 } },
     { { 0.5, 0.0, 0.0 }, { 0.5, 0.0, 0.15 }, { 0.5, 0.07, 0.22 }, { 0.5, 0.18, 0.22 } },
     { { -0.1143, -0.0282, 0.0 }, { 0.0, 0.0434, 0.0 }, { 0.1185, -0.0138, 0.0 }, { -0.0195, 0.1498, 0.0 } }