* To help us fund GROMACS development, we humbly ask that you cite
* the research papers on the package. Check out http://www.gromacs.org.
*/
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "gmxpre.h"
+
+#include "config.h"
+#include <assert.h>
#include <math.h>
#include <string.h>
-#include <assert.h>
-#include "sysstuff.h"
-#include "typedefs.h"
-#include "types/commrec.h"
-#include "vec.h"
+
+#include "gromacs/domdec/domdec.h"
+#include "gromacs/ewald/ewald.h"
+#include "gromacs/gmxlib/cuda_tools/pmalloc_cuda.h"
+#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
+#include "gromacs/legacyheaders/copyrite.h"
+#include "gromacs/legacyheaders/force.h"
+#include "gromacs/legacyheaders/gmx_detect_hardware.h"
+#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+#include "gromacs/legacyheaders/inputrec.h"
+#include "gromacs/legacyheaders/macros.h"
+#include "gromacs/legacyheaders/md_logging.h"
+#include "gromacs/legacyheaders/md_support.h"
+#include "gromacs/legacyheaders/names.h"
+#include "gromacs/legacyheaders/network.h"
+#include "gromacs/legacyheaders/nonbonded.h"
+#include "gromacs/legacyheaders/ns.h"
+#include "gromacs/legacyheaders/qmmm.h"
+#include "gromacs/legacyheaders/tables.h"
+#include "gromacs/legacyheaders/txtdump.h"
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/legacyheaders/types/commrec.h"
+#include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
+#include "gromacs/math/calculate-ewald-splitting-coefficient.h"
+#include "gromacs/math/units.h"
#include "gromacs/math/utilities.h"
-#include "macros.h"
-#include "gromacs/utility/smalloc.h"
-#include "macros.h"
-#include "gmx_fatal.h"
-#include "physics.h"
-#include "force.h"
-#include "tables.h"
-#include "nonbonded.h"
-#include "invblock.h"
-#include "names.h"
-#include "network.h"
-#include "pbc.h"
-#include "ns.h"
-#include "mshift.h"
-#include "txtdump.h"
-#include "coulomb.h"
-#include "md_support.h"
-#include "md_logging.h"
-#include "domdec.h"
-#include "qmmm.h"
-#include "copyrite.h"
-#include "mtop_util.h"
-#include "nbnxn_simd.h"
-#include "nbnxn_search.h"
-#include "nbnxn_atomdata.h"
-#include "nbnxn_consts.h"
-#include "gmx_omp_nthreads.h"
-#include "gmx_detect_hardware.h"
-#include "inputrec.h"
+#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/forcerec-threading.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_atomdata.h"
+#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/mdlib/nbnxn_search.h"
+#include "gromacs/mdlib/nbnxn_simd.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
+#include "gromacs/pbcutil/ishift.h"
+#include "gromacs/pbcutil/pbc.h"
#include "gromacs/simd/simd.h"
-
-#include "types/nbnxn_cuda_types_ext.h"
-#include "gpu_utils.h"
-#include "nbnxn_cuda_data_mgmt.h"
-#include "pmalloc_cuda.h"
+#include "gromacs/topology/mtop_util.h"
+#include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/smalloc.h"
t_forcerec *mk_forcerec(void)
{
static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
{
- int i, j, k, atnr;
- real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
- real *grid;
+ int i, j, k, atnr;
+ real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
+ real *grid;
+ const real oneOverSix = 1.0 / 6.0;
/* For LJ-PME simulations, we correct the energies with the reciprocal space
* inside of the cut-off. To do this the non-bonded kernels needs to have
if (fr->ljpme_combination_rule == eljpmeLB
&& !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
{
- sigmai = pow(c12i / c6i, 1.0/6.0);
- sigmaj = pow(c12j / c6j, 1.0/6.0);
+ sigmai = pow(c12i / c6i, oneOverSix);
+ sigmaj = pow(c12j / c6j, oneOverSix);
epsi = c6i * c6i / c12i;
epsj = c6j * c6j / c12j;
c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
{
- real *nbfp;
- int i, j, k, atnr;
- real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
- real c6, c12;
+ real *nbfp;
+ int i, j, atnr;
+ real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
+ real c6, c12;
+ const real oneOverSix = 1.0 / 6.0;
atnr = idef->atnr;
snew(nbfp, 2*atnr*atnr);
if (comb_rule == eCOMB_ARITHMETIC
&& !gmx_numzero(c6) && !gmx_numzero(c12))
{
- sigmai = pow(c12i / c6i, 1.0/6.0);
- sigmaj = pow(c12j / c6j, 1.0/6.0);
+ sigmai = pow(c12i / c6i, oneOverSix);
+ sigmaj = pow(c12j / c6j, oneOverSix);
epsi = c6i * c6i / c12i;
epsj = c6j * c6j / c12j;
c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
int cginfo,
int *cg_sp)
{
- const t_blocka *excl;
t_atom *atom;
int j, k;
int j0, j1, nj;
cginfo_mb_t *cginfo_mb)
{
const t_block * cgs;
- const t_block * mols;
const gmx_moltype_t *molt;
- int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
+ int mb, mol, cg_mol, at_offset, am, cgm, i, nmol_ch, nmol;
int n_solvent_parameters;
solvent_parameters_t *solvent_parameters;
int **cg_sp;
fprintf(debug, "Going to determine what solvent types we have.\n");
}
- mols = &mtop->mols;
-
n_solvent_parameters = 0;
solvent_parameters = NULL;
/* Allocate temporary array for solvent type */
snew(cg_sp, mtop->nmolblock);
- cg_offset = 0;
at_offset = 0;
for (mb = 0; mb < mtop->nmolblock; mb++)
{
&cg_sp[mb][cgm+cg_mol]);
}
}
- cg_offset += cgs->nr;
at_offset += cgs->index[cgs->nr];
}
bestsol = esolNO;
}
-#ifdef DISABLE_WATER_NLIST
- bestsol = esolNO;
-#endif
-
fr->nWatMol = 0;
for (mb = 0; mb < mtop->nmolblock; mb++)
{
cginfo_mb_t *cginfo_mb;
gmx_bool *type_VDW;
int *cginfo;
- int cg_offset, a_offset, cgm, am;
- int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
+ int cg_offset, a_offset;
+ int mb, m, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
int *a_con;
int ftype;
int ia;
gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
- ncg_tot = ncg_mtop(mtop);
snew(cginfo_mb, mtop->nmolblock);
snew(type_VDW, fr->ntype);
* Otherwise we make an array of #mol times #cgs per molecule.
*/
bId = TRUE;
- am = 0;
for (m = 0; m < molb->nmol; m++)
{
- am = m*cgs->index[cgs->nr];
+ int am = m*cgs->index[cgs->nr];
for (cg = 0; cg < cgs->nr; cg++)
{
a0 = cgs->index[cg];
for (m = 0; m < (bId ? 1 : molb->nmol); m++)
{
- cgm = m*cgs->nr;
- am = m*cgs->index[cgs->nr];
+ int cgm = m*cgs->nr;
+ int am = m*cgs->index[cgs->nr];
for (cg = 0; cg < cgs->nr; cg++)
{
a0 = cgs->index[cg];
{
const t_atoms *atoms, *atoms_tpi;
const t_blocka *excl;
- int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
+ int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, nexcl, q;
gmx_int64_t npair, npair_ij, tmpi, tmpj;
double csix, ctwelve;
int ntp, *typecount;
}
}
-static void pick_nbnxn_resources(const t_commrec *cr,
+static void pick_nbnxn_resources(FILE *fp,
+ const t_commrec *cr,
const gmx_hw_info_t *hwinfo,
gmx_bool bDoNonbonded,
gmx_bool *bUseGPU,
{
/* Each PP node will use the intra-node id-th device from the
* list of detected/selected GPUs. */
- if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
+ if (!init_gpu(fp, cr->rank_pp_intranode, gpu_err_str,
&hwinfo->gpu_info, gpu_opt))
{
/* At this point the init should never fail as we made sure that
gmx_bool bUsesSimpleTables,
real rtab)
{
- real spacing;
-
if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
{
init_ewald_f_table(ic, bUsesSimpleTables, rtab);
{
interaction_const_t *ic;
gmx_bool bUsesSimpleTables = TRUE;
+ const real minusSix = -6.0;
+ const real minusTwelve = -12.0;
snew(ic, 1);
{
case eintmodPOTSHIFT:
/* Only shift the potential, don't touch the force */
- ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
- ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
+ ic->dispersion_shift.cpot = -pow(ic->rvdw, minusSix);
+ ic->repulsion_shift.cpot = -pow(ic->rvdw, minusTwelve);
if (EVDW_PME(ic->vdwtype))
{
real crc2;
crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
- ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
+ ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, minusSix);
}
break;
case eintmodFORCESWITCH:
snew(nbv, 1);
- pick_nbnxn_resources(cr, fr->hwinfo,
+ pick_nbnxn_resources(fp, cr, fr->hwinfo,
fr->bNonbonded,
&nbv->bUseGPU,
&bEmulateGPU,
}
}
+gmx_bool usingGpu(nonbonded_verlet_t *nbv)
+{
+ return nbv != NULL && nbv->bUseGPU;
+}
+
void init_forcerec(FILE *fp,
const output_env_t oenv,
t_forcerec *fr,
gmx_bool bNoSolvOpt,
real print_force)
{
- int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
+ int i, m, negp_pp, negptable, egi, egj;
real rtab;
char *env;
double dbl;
gmx_bool bGenericKernelOnly;
gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
gmx_bool bFEP_NonBonded;
- t_nblists *nbl;
int *nm_ind, egp_flags;
if (fr->hwinfo == NULL)
fr->bDomDec = DOMAINDECOMP(cr);
- natoms = mtop->natoms;
-
if (check_box(ir->ePBC, box))
{
gmx_fatal(FARGS, check_box(ir->ePBC, box));
if (env != NULL)
{
dbl = 0;
- sscanf(env, "%lf", &dbl);
+ sscanf(env, "%20lf", &dbl);
fr->sc_sigma6_min = pow(dbl, 6);
if (fp)
{
egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
{
- nbl = &(fr->nblists[m]);
if (fr->nnblists > 1)
{
fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
fr->excl_load[t] = i;
}
}
+
+/* Frees GPU memory and destroys the CUDA context.
+ *
+ * Note that this function needs to be called even if GPUs are not used
+ * in this run because the PME ranks have no knowledge of whether GPUs
+ * are used or not, but all ranks need to enter the barrier below.
+ */
+void free_gpu_resources(const t_forcerec *fr,
+ const t_commrec *cr,
+ const gmx_gpu_info_t *gpu_info,
+ const gmx_gpu_opt_t *gpu_opt)
+{
+ gmx_bool bIsPPrankUsingGPU;
+ char gpu_err_str[STRLEN];
+
+ bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
+
+ if (bIsPPrankUsingGPU)
+ {
+ /* free nbnxn data in GPU memory */
+ nbnxn_cuda_free(fr->nbv->cu_nbv);
+
+ /* With tMPI we need to wait for all ranks to finish deallocation before
+ * destroying the context in free_gpu() as some ranks may be sharing
+ * GPU and context.
+ * Note: as only PP ranks need to free GPU resources, so it is safe to
+ * not call the barrier on PME ranks.
+ */
+#ifdef GMX_THREAD_MPI
+ if (PAR(cr))
+ {
+ gmx_barrier(cr);
+ }
+#endif /* GMX_THREAD_MPI */
+
+ /* uninitialize GPU (by destroying the context) */
+ if (!free_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
+ {
+ gmx_warning("On rank %d failed to free GPU #%d: %s",
+ cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
+ }
+ }
+}