Avoid comparing Simd types to real 0 when making masks in NB FEP kernel.
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_free_energy.cpp
index 33e485d191f139b7152b1d28cf80e66f5d1520b5..85f998cfc546412a2fafffbac2623f846656fe4a 100644 (file)
@@ -304,14 +304,21 @@ static void nb_free_energy_kernel(const t_nblist&
     gmx::ArrayRef<const int> shift  = nlist.shift;
     gmx::ArrayRef<const int> gid    = nlist.gid;
 
-    const real  lambda_coul = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
-    const real  lambda_vdw  = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
-    const auto& scParams    = *ic.softCoreParameters;
-    const real gmx_unused alpha_coul    = scParams.alphaCoulomb;
-    const real gmx_unused alpha_vdw     = scParams.alphaVdw;
-    const real            lam_power     = scParams.lambdaPower;
-    const real gmx_unused sigma6_def    = scParams.sigma6WithInvalidSigma;
-    const real gmx_unused sigma6_min    = scParams.sigma6Minimum;
+    const real lambda_coul = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Coul)];
+    const real lambda_vdw  = lambda[static_cast<int>(FreeEnergyPerturbationCouplingType::Vdw)];
+
+    // Extract softcore parameters
+    const auto&           scParams   = *ic.softCoreParameters;
+    const real            lam_power  = scParams.lambdaPower;
+    const real gmx_unused alpha_coul = scParams.alphaCoulomb;
+    const real gmx_unused alpha_vdw  = scParams.alphaVdw;
+    const real gmx_unused sigma6_def = scParams.sigma6WithInvalidSigma;
+    const real gmx_unused sigma6_min = scParams.sigma6Minimum;
+
+    const real gmx_unused gapsysScaleLinpointCoul = scParams.gapsysScaleLinpointCoul;
+    const real gmx_unused gapsysScaleLinpointVdW  = scParams.gapsysScaleLinpointVdW;
+    const real gmx_unused gapsysSigma6VdW         = scParams.gapsysSigma6VdW;
+
     const bool gmx_unused doShiftForces = ((flags & GMX_NONBONDED_DO_SHIFTFORCE) != 0);
     const bool            doPotential   = ((flags & GMX_NONBONDED_DO_POTENTIAL) != 0);
 
@@ -394,8 +401,8 @@ static void nb_free_energy_kernel(const t_nblist&
     const RealType            maxRInvSix(c_maxRInvSix);
     const RealType gmx_unused floatMin(GMX_FLOAT_MIN);
 
-    RealType dvdlCoul(zero);
-    RealType dvdlVdw(zero);
+    RealType dvdlCoul{ zero };
+    RealType dvdlVdw{ zero };
 
     /* Lambda factor for state A, 1-lambda*/
     real LFC[NSTATES], LFV[NSTATES];
@@ -472,10 +479,10 @@ static void nb_free_energy_kernel(const t_nblist&
         int            preloadIi[DataTypes::simdRealWidth];
         int gmx_unused preloadIs[DataTypes::simdRealWidth];
 #endif
-        for (int s = 0; s < DataTypes::simdRealWidth; s++)
+        for (int i = 0; i < DataTypes::simdRealWidth; i++)
         {
-            preloadIi[s] = ii;
-            preloadIs[s] = shift[n];
+            preloadIi[i] = ii;
+            preloadIs[i] = shift[n];
         }
         IntType ii_s = gmx::load<IntType>(preloadIi);
 
@@ -492,6 +499,12 @@ static void nb_free_energy_kernel(const t_nblist&
             alignas(GMX_SIMD_ALIGNMENT) real gmx_unused preloadSigma6[NSTATES][DataTypes::simdRealWidth];
             alignas(GMX_SIMD_ALIGNMENT) real gmx_unused preloadAlphaVdwEff[DataTypes::simdRealWidth];
             alignas(GMX_SIMD_ALIGNMENT) real gmx_unused preloadAlphaCoulEff[DataTypes::simdRealWidth];
+            alignas(GMX_SIMD_ALIGNMENT)
+                    real gmx_unused preloadGapsysScaleLinpointVdW[DataTypes::simdRealWidth];
+            alignas(GMX_SIMD_ALIGNMENT)
+                    real gmx_unused preloadGapsysScaleLinpointCoul[DataTypes::simdRealWidth];
+            alignas(GMX_SIMD_ALIGNMENT)
+                    real gmx_unused preloadGapsysSigma6VdW[NSTATES][DataTypes::simdRealWidth];
             alignas(GMX_SIMD_ALIGNMENT) real preloadLjPmeC6Grid[NSTATES][DataTypes::simdRealWidth];
 #else
             real            preloadPairIsValid[DataTypes::simdRealWidth];
@@ -502,94 +515,120 @@ static void nb_free_energy_kernel(const t_nblist&
             real gmx_unused preloadSigma6[NSTATES][DataTypes::simdRealWidth];
             real gmx_unused preloadAlphaVdwEff[DataTypes::simdRealWidth];
             real gmx_unused preloadAlphaCoulEff[DataTypes::simdRealWidth];
+            real gmx_unused preloadGapsysScaleLinpointVdW[DataTypes::simdRealWidth];
+            real gmx_unused preloadGapsysScaleLinpointCoul[DataTypes::simdRealWidth];
+            real gmx_unused preloadGapsysSigma6VdW[NSTATES][DataTypes::simdRealWidth];
             real            preloadLjPmeC6Grid[NSTATES][DataTypes::simdRealWidth];
 #endif
-            for (int s = 0; s < DataTypes::simdRealWidth; s++)
+            for (int j = 0; j < DataTypes::simdRealWidth; j++)
             {
-                if (k + s < nj1)
+                if (k + j < nj1)
                 {
-                    preloadPairIsValid[s] = true;
+                    preloadPairIsValid[j] = true;
                     /* Check if this pair on the exclusions list.*/
-                    preloadPairIncluded[s]  = (nlist.excl_fep.empty() || nlist.excl_fep[k + s]);
-                    const int jnr           = jjnr[k + s];
-                    preloadJnr[s]           = jnr;
-                    typeIndices[STATE_A][s] = ntiA + typeA[jnr];
-                    typeIndices[STATE_B][s] = ntiB + typeB[jnr];
-                    preloadQq[STATE_A][s]   = iqA * chargeA[jnr];
-                    preloadQq[STATE_B][s]   = iqB * chargeB[jnr];
+                    preloadPairIncluded[j]  = (nlist.excl_fep.empty() || nlist.excl_fep[k + j]);
+                    const int jnr           = jjnr[k + j];
+                    preloadJnr[j]           = jnr;
+                    typeIndices[STATE_A][j] = ntiA + typeA[jnr];
+                    typeIndices[STATE_B][j] = ntiB + typeB[jnr];
+                    preloadQq[STATE_A][j]   = iqA * chargeA[jnr];
+                    preloadQq[STATE_B][j]   = iqB * chargeB[jnr];
 
                     for (int i = 0; i < NSTATES; i++)
                     {
                         if constexpr (vdwInteractionTypeIsEwald)
                         {
-                            preloadLjPmeC6Grid[i][s] = nbfp_grid[2 * typeIndices[i][s]];
+                            preloadLjPmeC6Grid[i][j] = nbfp_grid[2 * typeIndices[i][j]];
                         }
                         else
                         {
-                            preloadLjPmeC6Grid[i][s] = 0;
+                            preloadLjPmeC6Grid[i][j] = 0;
                         }
-                        if constexpr (softcoreType == KernelSoftcoreType::Beutler
-                                      || softcoreType == KernelSoftcoreType::Gapsys)
+                        if constexpr (softcoreType == KernelSoftcoreType::Beutler)
                         {
-                            const real c6  = nbfp[2 * typeIndices[i][s]];
-                            const real c12 = nbfp[2 * typeIndices[i][s] + 1];
+                            const real c6  = nbfp[2 * typeIndices[i][j]];
+                            const real c12 = nbfp[2 * typeIndices[i][j] + 1];
                             if (c6 > 0 && c12 > 0)
                             {
                                 /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
-                                preloadSigma6[i][s] = 0.5_real * c12 / c6;
-                                if (preloadSigma6[i][s]
+                                preloadSigma6[i][j] = 0.5_real * c12 / c6;
+                                if (preloadSigma6[i][j]
                                     < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
                                 {
-                                    preloadSigma6[i][s] = sigma6_min;
+                                    preloadSigma6[i][j] = sigma6_min;
                                 }
                             }
                             else
                             {
-                                preloadSigma6[i][s] = sigma6_def;
+                                preloadSigma6[i][j] = sigma6_def;
+                            }
+                        }
+                        if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
+                        {
+                            const real c6  = nbfp[2 * typeIndices[i][j]];
+                            const real c12 = nbfp[2 * typeIndices[i][j] + 1];
+                            if (c6 > 0 && c12 > 0)
+                            {
+                                /* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
+                                preloadGapsysSigma6VdW[i][j] = 0.5_real * c12 / c6;
+                            }
+                            else
+                            {
+                                preloadGapsysSigma6VdW[i][j] = gapsysSigma6VdW;
                             }
                         }
                     }
-                    if constexpr (softcoreType == KernelSoftcoreType::Beutler
-                                  || softcoreType == KernelSoftcoreType::Gapsys)
+                    if constexpr (softcoreType == KernelSoftcoreType::Beutler)
                     {
                         /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
-                        const real c12A = nbfp[2 * typeIndices[STATE_A][s] + 1];
-                        const real c12B = nbfp[2 * typeIndices[STATE_B][s] + 1];
+                        const real c12A = nbfp[2 * typeIndices[STATE_A][j] + 1];
+                        const real c12B = nbfp[2 * typeIndices[STATE_B][j] + 1];
                         if (c12A > 0 && c12B > 0)
                         {
-                            preloadAlphaVdwEff[s]  = 0;
-                            preloadAlphaCoulEff[s] = 0;
+                            preloadAlphaVdwEff[j]  = 0;
+                            preloadAlphaCoulEff[j] = 0;
                         }
                         else
                         {
-                            if constexpr (softcoreType == KernelSoftcoreType::Beutler)
-                            {
-                                preloadAlphaVdwEff[s]  = alpha_vdw;
-                                preloadAlphaCoulEff[s] = alpha_coul;
-                            }
-                            else if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
-                            {
-                                preloadAlphaVdwEff[s]  = alpha_vdw;
-                                preloadAlphaCoulEff[s] = gmx::sixthroot(sigma6_def);
-                            }
+                            preloadAlphaVdwEff[j]  = alpha_vdw;
+                            preloadAlphaCoulEff[j] = alpha_coul;
+                        }
+                    }
+                    if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
+                    {
+                        /* only use softcore if one of the states has a zero endstate - softcore is for avoiding infinities!*/
+                        const real c12A = nbfp[2 * typeIndices[STATE_A][j] + 1];
+                        const real c12B = nbfp[2 * typeIndices[STATE_B][j] + 1];
+                        if (c12A > 0 && c12B > 0)
+                        {
+                            preloadGapsysScaleLinpointVdW[j]  = 0;
+                            preloadGapsysScaleLinpointCoul[j] = 0;
+                        }
+                        else
+                        {
+                            preloadGapsysScaleLinpointVdW[j]  = gapsysScaleLinpointVdW;
+                            preloadGapsysScaleLinpointCoul[j] = gapsysScaleLinpointCoul;
                         }
                     }
                 }
                 else
                 {
-                    preloadJnr[s]          = jjnr[k];
-                    preloadPairIsValid[s]  = false;
-                    preloadPairIncluded[s] = false;
-                    preloadAlphaVdwEff[s]  = 0;
-                    preloadAlphaCoulEff[s] = 0;
-
+                    preloadJnr[j]                     = jjnr[k];
+                    preloadPairIsValid[j]             = false;
+                    preloadPairIncluded[j]            = false;
+                    preloadAlphaVdwEff[j]             = 0;
+                    preloadAlphaCoulEff[j]            = 0;
+                    preloadGapsysScaleLinpointVdW[j]  = 0;
+                    preloadGapsysScaleLinpointCoul[j] = 0;
+
+                    typeIndices[STATE_A][j] = ntiA + typeA[jjnr[k]];
+                    typeIndices[STATE_B][j] = ntiB + typeB[jjnr[k]];
                     for (int i = 0; i < NSTATES; i++)
                     {
-                        typeIndices[STATE_A][s]  = ntiA + typeA[jjnr[k]];
-                        typeIndices[STATE_B][s]  = ntiB + typeB[jjnr[k]];
-                        preloadLjPmeC6Grid[i][s] = 0;
-                        preloadQq[i][s]          = 0;
-                        preloadSigma6[i][s]      = 0;
+                        preloadLjPmeC6Grid[i][j]     = 0;
+                        preloadQq[i][j]              = 0;
+                        preloadSigma6[i][j]          = 0;
+                        preloadGapsysSigma6VdW[i][j] = 0;
                     }
                 }
             }
@@ -641,23 +680,33 @@ static void nb_free_energy_kernel(const t_nblist&
             RealType gmx_unused ljPmeC6Grid[NSTATES];
             RealType gmx_unused alphaVdwEff;
             RealType gmx_unused alphaCoulEff;
+            RealType gmx_unused gapsysScaleLinpointVdWEff;
+            RealType gmx_unused gapsysScaleLinpointCoulEff;
+            RealType gmx_unused gapsysSigma6VdWEff[NSTATES];
             for (int i = 0; i < NSTATES; i++)
             {
                 gmx::gatherLoadTranspose<2>(nbfp.data(), typeIndices[i], &c6[i], &c12[i]);
                 qq[i]          = gmx::load<RealType>(preloadQq[i]);
                 ljPmeC6Grid[i] = gmx::load<RealType>(preloadLjPmeC6Grid[i]);
-                if constexpr (softcoreType == KernelSoftcoreType::Beutler
-                              || softcoreType == KernelSoftcoreType::Gapsys)
+                if constexpr (softcoreType == KernelSoftcoreType::Beutler)
                 {
                     sigma6[i] = gmx::load<RealType>(preloadSigma6[i]);
                 }
+                if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
+                {
+                    gapsysSigma6VdWEff[i] = gmx::load<RealType>(preloadGapsysSigma6VdW[i]);
+                }
             }
-            if constexpr (softcoreType == KernelSoftcoreType::Beutler
-                          || softcoreType == KernelSoftcoreType::Gapsys)
+            if constexpr (softcoreType == KernelSoftcoreType::Beutler)
             {
                 alphaVdwEff  = gmx::load<RealType>(preloadAlphaVdwEff);
                 alphaCoulEff = gmx::load<RealType>(preloadAlphaCoulEff);
             }
+            if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
+            {
+                gapsysScaleLinpointVdWEff  = gmx::load<RealType>(preloadGapsysScaleLinpointVdW);
+                gapsysScaleLinpointCoulEff = gmx::load<RealType>(preloadGapsysScaleLinpointCoul);
+            }
 
             // Avoid overflow of r^-12 at distances near zero
             rSq  = gmx::max(rSq, minDistanceSquared);
@@ -759,18 +808,18 @@ static void nb_free_energy_kernel(const t_nblist&
 
                                 if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
                                 {
-                                    ewaldQuadraticPotential(qq[i],
-                                                            facel,
-                                                            rC,
-                                                            rCutoffCoul,
-                                                            LFC[i],
-                                                            DLF[i],
-                                                            alphaCoulEff,
-                                                            sh_ewald,
-                                                            &fScalC[i],
-                                                            &vCoul[i],
-                                                            &dvdlCoul,
-                                                            computeElecInteraction);
+                                    ewaldQuadraticPotential<computeForces>(qq[i],
+                                                                           facel,
+                                                                           rC,
+                                                                           rCutoffCoul,
+                                                                           LFC[i],
+                                                                           DLF[i],
+                                                                           gapsysScaleLinpointCoulEff,
+                                                                           sh_ewald,
+                                                                           &fScalC[i],
+                                                                           &vCoul[i],
+                                                                           &dvdlCoul,
+                                                                           computeElecInteraction);
                                 }
                             }
                             else
@@ -783,19 +832,20 @@ static void nb_free_energy_kernel(const t_nblist&
 
                                 if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
                                 {
-                                    reactionFieldQuadraticPotential(qq[i],
-                                                                    facel,
-                                                                    rC,
-                                                                    rCutoffCoul,
-                                                                    LFC[i],
-                                                                    DLF[i],
-                                                                    alphaCoulEff,
-                                                                    krf,
-                                                                    crf,
-                                                                    &fScalC[i],
-                                                                    &vCoul[i],
-                                                                    &dvdlCoul,
-                                                                    computeElecInteraction);
+                                    reactionFieldQuadraticPotential<computeForces>(
+                                            qq[i],
+                                            facel,
+                                            rC,
+                                            rCutoffCoul,
+                                            LFC[i],
+                                            DLF[i],
+                                            gapsysScaleLinpointCoulEff,
+                                            krf,
+                                            crf,
+                                            &fScalC[i],
+                                            &vCoul[i],
+                                            &dvdlCoul,
+                                            computeElecInteraction);
                                 }
                             }
 
@@ -814,12 +864,12 @@ static void nb_free_energy_kernel(const t_nblist&
                         if constexpr (vdwInteractionTypeIsEwald)
                         {
                             computeVdwInteraction =
-                                    (r < rVdw && (c6[i] != 0 || c12[i] != 0) && bPairIncluded);
+                                    (r < rVdw && (c6[i] != zero || c12[i] != zero) && bPairIncluded);
                         }
                         else
                         {
                             computeVdwInteraction =
-                                    (rV < rVdw && (c6[i] != 0 || c12[i] != 0) && bPairIncluded);
+                                    (rV < rVdw && (c6[i] != zero || c12[i] != zero) && bPairIncluded);
                         }
                         if (gmx::anyTrue(computeVdwInteraction))
                         {
@@ -850,20 +900,20 @@ static void nb_free_energy_kernel(const t_nblist&
 
                             if constexpr (softcoreType == KernelSoftcoreType::Gapsys)
                             {
-                                lennardJonesQuadraticPotential(c6[i],
-                                                               c12[i],
-                                                               r,
-                                                               rSq,
-                                                               LFV[i],
-                                                               DLF[i],
-                                                               sigma6[i],
-                                                               alphaVdwEff,
-                                                               repulsionShift,
-                                                               dispersionShift,
-                                                               &fScalV[i],
-                                                               &vVdw[i],
-                                                               &dvdlVdw,
-                                                               computeVdwInteraction);
+                                lennardJonesQuadraticPotential<computeForces>(c6[i],
+                                                                              c12[i],
+                                                                              r,
+                                                                              rSq,
+                                                                              LFV[i],
+                                                                              DLF[i],
+                                                                              gapsysSigma6VdWEff[i],
+                                                                              gapsysScaleLinpointVdWEff,
+                                                                              repulsionShift,
+                                                                              dispersionShift,
+                                                                              &fScalV[i],
+                                                                              &vVdw[i],
+                                                                              &dvdlVdw,
+                                                                              computeVdwInteraction);
                             }
 
                             if constexpr (vdwInteractionTypeIsEwald)
@@ -1136,7 +1186,7 @@ static KernelFunction dispatchKernelOnUseSimd(const bool useSimd)
 #if GMX_SIMD_HAVE_REAL && GMX_SIMD_HAVE_INT32_ARITHMETICS && GMX_USE_SIMD_KERNELS
         return (nb_free_energy_kernel<SimdDataTypes, softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces>);
 #else
-        return (nb_free_energy_kernel<ScalarDataTypes, softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch computeForces>);
+        return (nb_free_energy_kernel<ScalarDataTypes, softcoreType, scLambdasOrAlphasDiffer, vdwInteractionTypeIsEwald, elecInteractionTypeIsEwald, vdwModifierIsPotSwitch, computeForces>);
 #endif
     }
     else
@@ -1242,21 +1292,12 @@ static KernelFunction dispatchKernel(const bool                 scLambdasOrAlpha
                                      const bool                 useSimd,
                                      const interaction_const_t& ic)
 {
-    if (ic.softCoreParameters->alphaCoulomb == 0 && ic.softCoreParameters->alphaVdw == 0)
-    {
-        return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::None>(
-                scLambdasOrAlphasDiffer,
-                vdwInteractionTypeIsEwald,
-                elecInteractionTypeIsEwald,
-                vdwModifierIsPotSwitch,
-                computeForces,
-                useSimd));
-    }
-    else
+    const auto& scParams = *ic.softCoreParameters;
+    if (scParams.softcoreType == SoftcoreType::Beutler)
     {
-        if (ic.softCoreParameters->softcoreType == SoftcoreType::Beutler)
+        if (scParams.alphaCoulomb == 0 && scParams.alphaVdw == 0)
         {
-            return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::Beutler>(
+            return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::None>(
                     scLambdasOrAlphasDiffer,
                     vdwInteractionTypeIsEwald,
                     elecInteractionTypeIsEwald,
@@ -1264,9 +1305,19 @@ static KernelFunction dispatchKernel(const bool                 scLambdasOrAlpha
                     computeForces,
                     useSimd));
         }
-        else
+        return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::Beutler>(
+                scLambdasOrAlphasDiffer,
+                vdwInteractionTypeIsEwald,
+                elecInteractionTypeIsEwald,
+                vdwModifierIsPotSwitch,
+                computeForces,
+                useSimd));
+    }
+    else // Gapsys
+    {
+        if (scParams.gapsysScaleLinpointCoul == 0 && scParams.gapsysScaleLinpointVdW == 0)
         {
-            return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::Gapsys>(
+            return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::None>(
                     scLambdasOrAlphasDiffer,
                     vdwInteractionTypeIsEwald,
                     elecInteractionTypeIsEwald,
@@ -1274,6 +1325,13 @@ static KernelFunction dispatchKernel(const bool                 scLambdasOrAlpha
                     computeForces,
                     useSimd));
         }
+        return (dispatchKernelOnScLambdasOrAlphasDifference<KernelSoftcoreType::Gapsys>(
+                scLambdasOrAlphasDiffer,
+                vdwInteractionTypeIsEwald,
+                elecInteractionTypeIsEwald,
+                vdwModifierIsPotSwitch,
+                computeForces,
+                useSimd));
     }
 }