#include <sys/time.h>
#endif
#include <math.h>
+#include <assert.h>
+
#include "typedefs.h"
#include "string2.h"
#include "smalloc.h"
#include "nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h"
#include "nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h"
#include "nbnxn_kernels/nbnxn_kernel_gpu_ref.h"
+#include "nonbonded.h"
+#include "../gmxlib/nonbonded/nb_kernel.h"
+#include "../gmxlib/nonbonded/nb_free_energy.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/timing/walltime_accounting.h"
#include "adress.h"
#include "qmmm.h"
+#include "gmx_omp_nthreads.h"
+
#include "nbnxn_cuda_data_mgmt.h"
#include "nbnxn_cuda/nbnxn_cuda.h"
}
}
+static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
+ t_forcerec *fr,
+ rvec x[],
+ rvec f[],
+ t_mdatoms *mdatoms,
+ t_lambda *fepvals,
+ real *lambda,
+ gmx_enerdata_t *enerd,
+ int flags,
+ t_nrnb *nrnb,
+ gmx_wallcycle_t wcycle)
+{
+ int donb_flags;
+ nb_kernel_data_t kernel_data;
+ real lam_i[efptNR];
+ real dvdl_nb[efptNR];
+ int th;
+ int i, j;
+
+ donb_flags = 0;
+ /* Add short-range interactions */
+ donb_flags |= GMX_NONBONDED_DO_SR;
+
+ /* Currently all group scheme kernels always calculate (shift-)forces */
+ if (flags & GMX_FORCE_FORCES)
+ {
+ donb_flags |= GMX_NONBONDED_DO_FORCE;
+ }
+ if (flags & GMX_FORCE_VIRIAL)
+ {
+ donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
+ }
+ if (flags & GMX_FORCE_ENERGY)
+ {
+ donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
+ }
+ if (flags & GMX_FORCE_DO_LR)
+ {
+ donb_flags |= GMX_NONBONDED_DO_LR;
+ }
+
+ kernel_data.flags = donb_flags;
+ kernel_data.lambda = lambda;
+ kernel_data.dvdl = dvdl_nb;
+
+ kernel_data.energygrp_elec = enerd->grpp.ener[egCOULSR];
+ kernel_data.energygrp_vdw = enerd->grpp.ener[egLJSR];
+
+ /* reset free energy components */
+ for (i = 0; i < efptNR; i++)
+ {
+ dvdl_nb[i] = 0;
+ }
+
+ assert(gmx_omp_nthreads_get(emntNonbonded) == nbl_lists->nnbl);
+
+ wallcycle_sub_start(wcycle, ewcsNONBONDED);
+#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
+ for (th = 0; th < nbl_lists->nnbl; th++)
+ {
+ gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
+ x, f, fr, mdatoms, &kernel_data, nrnb);
+ }
+
+ if (fepvals->sc_alpha != 0)
+ {
+ enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
+ enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
+ }
+ else
+ {
+ enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
+ enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
+ }
+
+ /* If we do foreign lambda and we have soft-core interactions
+ * we have to recalculate the (non-linear) energies contributions.
+ */
+ if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
+ {
+ kernel_data.flags = (donb_flags & ~(GMX_NONBONDED_DO_FORCE | GMX_NONBONDED_DO_SHIFTFORCE)) | GMX_NONBONDED_DO_FOREIGNLAMBDA;
+ kernel_data.lambda = lam_i;
+ kernel_data.energygrp_elec = enerd->foreign_grpp.ener[egCOULSR];
+ kernel_data.energygrp_vdw = enerd->foreign_grpp.ener[egLJSR];
+ /* Note that we add to kernel_data.dvdl, but ignore the result */
+
+ for (i = 0; i < enerd->n_lambda; i++)
+ {
+ for (j = 0; j < efptNR; j++)
+ {
+ lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
+ }
+ reset_foreign_enerdata(enerd);
+#pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl)
+ for (th = 0; th < nbl_lists->nnbl; th++)
+ {
+ gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th],
+ x, f, fr, mdatoms, &kernel_data, nrnb);
+ }
+
+ sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
+ enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
+ }
+ }
+
+ wallcycle_sub_stop(wcycle, ewcsNONBONDED);
+}
+
void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
t_inputrec *inputrec,
gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
nrnb, wcycle);
}
+ if (fr->efep != efepNO)
+ {
+ /* Calculate the local and non-local free energy interactions here.
+ * Happens here on the CPU both with and without GPU.
+ */
+ if (fr->nbv->grp[eintLocal].nbl_lists.nbl_fep[0]->nrj > 0)
+ {
+ do_nb_verlet_fep(&fr->nbv->grp[eintLocal].nbl_lists,
+ fr, x, f, mdatoms,
+ inputrec->fepvals, lambda,
+ enerd, flags, nrnb, wcycle);
+ }
+
+ if (DOMAINDECOMP(cr) &&
+ fr->nbv->grp[eintNonlocal].nbl_lists.nbl_fep[0]->nrj > 0)
+ {
+ do_nb_verlet_fep(&fr->nbv->grp[eintNonlocal].nbl_lists,
+ fr, x, f, mdatoms,
+ inputrec->fepvals, lambda,
+ enerd, flags, nrnb, wcycle);
+ }
+ }
+
if (!bUseOrEmulGPU || bDiffKernels)
{
int aloc;