From: Berk Hess Date: Fri, 14 Aug 2020 19:23:02 +0000 (+0000) Subject: Add SIMD bonded tests and improve SIMD angles() X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=fb7a6480db5914a50a97618b3e6316663041aafb;p=alexxy%2Fgromacs.git Add SIMD bonded tests and improve SIMD angles() The listed_forces module bonded tests are now also executed for the force-only and SIMD flavor of the bonded kernels, when available. Improved precision of SIMD angles kernel for small angles by avoiding two invsqrt operations for computing the sin. --- diff --git a/src/gromacs/listed_forces/bonded.cpp b/src/gromacs/listed_forces/bonded.cpp index 60b42ca235..113c5efd65 100644 --- a/src/gromacs/listed_forces/bonded.cpp +++ b/src/gromacs/listed_forces/bonded.cpp @@ -997,6 +997,7 @@ angles(int nbonds, SimdReal nrij2_S, nrij_1_S; SimdReal nrkj2_S, nrkj_1_S; SimdReal cos_S, invsin_S; + SimdReal cos2_S; SimdReal theta_S; SimdReal st_S, sth_S; SimdReal cik_S, cii_S, ckk_S; @@ -1065,6 +1066,16 @@ angles(int nbonds, cos_S = rij_rkj_S * nrij_1_S * nrkj_1_S; + /* We compute cos^2 using a division instead of squaring cos_S, + * as we would then get additional error contributions from + * the two invsqrt operations, which get amplified by + * the 1/sqrt(1-cos^2) for cos values close to 1. + * + * Note that the non-SIMD version of angles() avoids this issue + * by computing the cosine using double precision. + */ + cos2_S = rij_rkj_S * rij_rkj_S / (nrij2_S * nrkj2_S); + /* To allow for 180 degrees, we take the max of cos and -1 + 1bit, * so we can safely get the 1/sin from 1/sqrt(1 - cos^2). * This also ensures that rounding errors would cause the argument @@ -1076,7 +1087,7 @@ angles(int nbonds, theta_S = acos(cos_S); - invsin_S = invsqrt(one_S - cos_S * cos_S); + invsin_S = invsqrt(one_S - cos2_S); st_S = k_S * (theta0_S - theta_S) * invsin_S; sth_S = st_S * cos_S; diff --git a/src/gromacs/listed_forces/tests/bonded.cpp b/src/gromacs/listed_forces/tests/bonded.cpp index f1daf94f52..caaa6e09ad 100644 --- a/src/gromacs/listed_forces/tests/bonded.cpp +++ b/src/gromacs/listed_forces/tests/bonded.cpp @@ -51,6 +51,7 @@ #include #include "gromacs/listed_forces/listed_forces.h" +#include "gromacs/math/paddedvector.h" #include "gromacs/math/units.h" #include "gromacs/math/vec.h" #include "gromacs/math/vectypes.h" @@ -85,20 +86,29 @@ struct OutputQuantities //! Shift vectors rvec fshift[N_IVEC] = { { 0 } }; //! Forces - rvec4 f[c_numAtoms] = { { 0 } }; + alignas(GMX_REAL_MAX_SIMD_WIDTH * sizeof(real)) rvec4 f[c_numAtoms] = { { 0 } }; }; /*! \brief Utility to check the output from bonded tests * * \param[in] checker Reference checker * \param[in] output The output from the test to check + * \param[in] bondedKernelFlavor Flavor for determining what output to check */ -void checkOutput(test::TestReferenceChecker* checker, const OutputQuantities& output) +void checkOutput(test::TestReferenceChecker* checker, + const OutputQuantities& output, + const BondedKernelFlavor bondedKernelFlavor) { - checker->checkReal(output.energy, "Epot "); - // Should still be zero if not doing FEP, so may as well test it. - checker->checkReal(output.dvdlambda, "dVdlambda "); - checker->checkVector(output.fshift[CENTRAL], "Central shift forces"); + if (computeEnergy(bondedKernelFlavor)) + { + checker->checkReal(output.energy, "Epot "); + // Should still be zero when not doing FEP, so may as well test it. + checker->checkReal(output.dvdlambda, "dVdlambda "); + } + if (computeVirial(bondedKernelFlavor)) + { + checker->checkVector(output.fshift[CENTRAL], "Central shift forces"); + } checker->checkSequence(std::begin(output.f), std::end(output.f), "Forces"); } @@ -514,12 +524,12 @@ void fillIatoms(int ftype, std::vector* iatoms) } class ListedForcesTest : - public ::testing::TestWithParam, PbcType>> + public ::testing::TestWithParam, PbcType>> { protected: matrix box_; t_pbc pbc_; - std::vector x_; + PaddedVector x_; PbcType pbcType_; iListInput input_; test::TestReferenceData refData_; @@ -545,16 +555,29 @@ protected: std::vector chargeA = { 1.5, -2.0, 1.5, -1.0 }; t_mdatoms mdatoms = { 0 }; mdatoms.chargeA = chargeA.data(); - OutputQuantities output; - output.energy = calculateSimpleBond(input_.ftype, iatoms.size(), iatoms.data(), - &input_.iparams, as_rvec_array(x_.data()), output.f, - output.fshift, &pbc_, lambda, &output.dvdlambda, &mdatoms, - /* struct t_fcdata * */ nullptr, ddgatindex.data(), - BondedKernelFlavor::ForcesAndVirialAndEnergy); - // Internal consistency test of both test input - // and bonded functions. - EXPECT_TRUE((input_.fep || (output.dvdlambda == 0.0))) << "dvdlambda was " << output.dvdlambda; - checkOutput(checker, output); + /* Here we run both the standard, plain-C force+shift-forcs+energy+free-energy + * kernel flavor and the potentially optimized, with SIMD and less output, + * force only kernels. Note that we also run the optimized kernel for free-energy + * input when lambda=0, as the force output should match the non free-energy case. + */ + std::vector flavors = { BondedKernelFlavor::ForcesAndVirialAndEnergy }; + if (!input_.fep || lambda == 0) + { + flavors.push_back(BondedKernelFlavor::ForcesSimdWhenAvailable); + } + for (const auto flavor : flavors) + { + OutputQuantities output; + output.energy = + calculateSimpleBond(input_.ftype, iatoms.size(), iatoms.data(), &input_.iparams, + as_rvec_array(x_.data()), output.f, output.fshift, &pbc_, + lambda, &output.dvdlambda, &mdatoms, + /* struct t_fcdata * */ nullptr, ddgatindex.data(), flavor); + // Internal consistency test of both test input + // and bonded functions. + EXPECT_TRUE((input_.fep || (output.dvdlambda == 0.0))) << "dvdlambda was " << output.dvdlambda; + checkOutput(checker, output, flavor); + } } void testIfunc() { @@ -653,7 +676,7 @@ std::vector c_InputRestraints = { }; //! Coordinates for testing -std::vector> c_coordinatesForTests = { +std::vector> c_coordinatesForTests = { { { 0.0, 0.0, 0.0 }, { 0.0, 0.0, 0.2 }, { 0.005, 0.0, 0.1 }, { -0.001, 0.1, 0.0 } }, { { 0.5, 0.0, 0.0 }, { 0.5, 0.0, 0.15 }, { 0.5, 0.07, 0.22 }, { 0.5, 0.18, 0.22 } }, { { -0.1143, -0.0282, 0.0 }, { 0.0, 0.0434, 0.0 }, { 0.1185, -0.0138, 0.0 }, { -0.0195, 0.1498, 0.0 } }