#include "gromacs/domdec/domdec_struct.h"
#include "gromacs/ewald/ewald.h"
#include "gromacs/fileio/filetypes.h"
-#include "gromacs/gmxlib/md_logging.h"
#include "gromacs/gmxlib/network.h"
#include "gromacs/gmxlib/nonbonded/nonbonded.h"
#include "gromacs/gpu_utils/gpu_utils.h"
#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
#include "gromacs/mdlib/nbnxn_search.h"
#include "gromacs/mdlib/nbnxn_simd.h"
+#include "gromacs/mdlib/nbnxn_tuning.h"
#include "gromacs/mdlib/nbnxn_util.h"
#include "gromacs/mdlib/ns.h"
#include "gromacs/mdlib/qmmm.h"
#include "gromacs/mdtypes/commrec.h"
#include "gromacs/mdtypes/fcdata.h"
#include "gromacs/mdtypes/group.h"
+#include "gromacs/mdtypes/iforceprovider.h"
#include "gromacs/mdtypes/inputrec.h"
#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/pbcutil/ishift.h"
#include "gromacs/utility/exceptions.h"
#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/logger.h"
#include "gromacs/utility/pleasecite.h"
#include "gromacs/utility/smalloc.h"
-#include "gromacs/utility/stringutil.h"
+#include "gromacs/utility/strconvert.h"
#include "nbnxn_gpu_jit_support.h"
const char *egrp_nm[egNR+1] = {
"Coul-SR", "LJ-SR", "Buck-SR",
- "Coul-14", "LJ-14", NULL
+ "Coul-14", "LJ-14", nullptr
};
t_forcerec *mk_forcerec(void)
/* Check if we are doing QM on this group */
qm = FALSE;
- if (qm_grpnr != NULL)
+ if (qm_grpnr != nullptr)
{
for (j = j0; j < j1 && !qm; j++)
{
}
n_solvent_parameters = 0;
- solvent_parameters = NULL;
+ solvent_parameters = nullptr;
/* Allocate temporary array for solvent type */
snew(cg_sp, mtop->nmolblock);
{
check_solvent_cg(molt, cg_mol, nmol,
mtop->groups.grpnr[egcQMMM] ?
- mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
+ mtop->groups.grpnr[egcQMMM]+at_offset+am : nullptr,
&mtop->groups.grps[egcQMMM],
fr,
&n_solvent_parameters, &solvent_parameters,
}
sfree(cg_sp);
- if (bestsol != esolNO && fp != NULL)
+ if (bestsol != esolNO && fp != nullptr)
{
fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
esol_names[bestsol],
{
bId = FALSE;
}
- if (mtop->groups.grpnr[egcQMMM] != NULL)
+ if (mtop->groups.grpnr[egcQMMM] != nullptr)
{
for (ai = a0; ai < a1; ai++)
{
{
if (fr->eeltype == eelGRF)
{
- calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
+ calc_rffac(nullptr, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
fr->rcoulomb, fr->temp, fr->zsquare, box,
&fr->kappa, &fr->k_rf, &fr->c_rf);
}
int ntp, *typecount;
gmx_bool bBHAM;
real *nbfp;
- real *nbfp_comb = NULL;
+ real *nbfp_comb = nullptr;
ntp = fr->ntype;
bBHAM = fr->bBHAM;
sfree(nbfp_comb);
}
- if (fplog != NULL)
+ if (fplog != nullptr)
{
if (fr->eDispCorr == edispcAllEner ||
fr->eDispCorr == edispcAllEnerPres)
char buf[STRLEN];
int i, j;
- if (tabfn == NULL)
+ if (tabfn == nullptr)
{
if (debug)
{
int ncount, *count;
bondedtable_t *tab;
- tab = NULL;
+ tab = nullptr;
ncount = 0;
- count = NULL;
+ count = nullptr;
count_tables(ftype1, ftype2, mtop, &ncount, &count);
// Are there any relevant tabulated bond interactions?
if (fr->bF_NoVirSum)
{
- fr->f_novirsum_n = natoms_f_novirsum;
- if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
- {
- fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
- srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
- }
- }
- else
- {
- fr->f_novirsum_n = 0;
+ /* TODO: remove this + 1 when padding is properly implemented */
+ fr->forceBufferNoVirialSummation->resize(natoms_f_novirsum + 1);
}
}
(ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
ir->gb_algorithm == egbHCT ||
ir->gb_algorithm == egbOBC))) &&
- getenv("GMX_NO_ALLVSALL") == NULL
+ getenv("GMX_NO_ALLVSALL") == nullptr
);
if (bAllvsAll && ir->opts.ngener > 1)
if (bPrintNote)
{
- if (MASTER(cr))
- {
- fprintf(stderr, "\n%s\n", note);
- }
- if (fp != NULL)
+ if (fp != nullptr)
{
fprintf(fp, "\n%s\n", note);
}
}
-gmx_bool nbnxn_gpu_acceleration_supported(FILE *fplog,
- const t_commrec *cr,
- const t_inputrec *ir,
- gmx_bool bRerunMD)
-{
- if (bRerunMD && ir->opts.ngener > 1)
- {
- /* Rerun execution time is dominated by I/O and pair search,
- * so GPUs are not very useful, plus they do not support more
- * than one energy group. If the user requested GPUs
- * explicitly, a fatal error is given later. With non-reruns,
- * we fall back to a single whole-of system energy group
- * (which runs much faster than a multiple-energy-groups
- * implementation would), and issue a note in the .log
- * file. Users can re-run if they want the information. */
- md_print_warn(cr, fplog, "Rerun with energy groups is not implemented for GPUs, falling back to the CPU\n");
- return FALSE;
- }
-
- return TRUE;
-}
-
-gmx_bool nbnxn_simd_supported(FILE *fplog,
- const t_commrec *cr,
- const t_inputrec *ir)
+gmx_bool nbnxn_simd_supported(const gmx::MDLogger &mdlog,
+ const t_inputrec *ir)
{
if (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB)
{
/* LJ PME with LB combination rule does 7 mesh operations.
* This so slow that we don't compile SIMD non-bonded kernels
* for that. */
- md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with SIMD kernels, falling back to plain C kernels\n");
+ GMX_LOG(mdlog.warning).asParagraph().appendText("LJ-PME with Lorentz-Berthelot is not supported with SIMD kernels, falling back to plain C kernels");
return FALSE;
}
#endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
- if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
+ if (getenv("GMX_NBNXN_SIMD_4XN") != nullptr)
{
#ifdef GMX_NBNXN_SIMD_4XN
*kernel_type = nbnxnk4xN_SIMD_4xN;
gmx_fatal(FARGS, "SIMD 4xN kernels requested, but GROMACS has been compiled without support for these kernels");
#endif
}
- if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
+ if (getenv("GMX_NBNXN_SIMD_2XNN") != nullptr)
{
#ifdef GMX_NBNXN_SIMD_2XNN
*kernel_type = nbnxnk4xN_SIMD_2xNN;
* With FMA analytical is sometimes faster for a width if 4 as well.
* On BlueGene/Q, this is faster regardless of precision.
* In single precision, this is faster on Bulldozer.
+ * On Skylake table is faster in single and double. TODO: Test 5xxx series.
*/
-#if GMX_SIMD_REAL_WIDTH >= 8 || \
- (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !GMX_DOUBLE) || GMX_SIMD_IBM_QPX
+#if ((GMX_SIMD_REAL_WIDTH >= 8 || (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !GMX_DOUBLE)) \
+ && !GMX_SIMD_X86_AVX_512) || GMX_SIMD_IBM_QPX
*ewald_excl = ewaldexclAnalytical;
#endif
- if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
+ if (getenv("GMX_NBNXN_EWALD_TABLE") != nullptr)
{
*ewald_excl = ewaldexclTable;
}
- if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
+ if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != nullptr)
{
*ewald_excl = ewaldexclAnalytical;
}
const char *lookup_nbnxn_kernel_name(int kernel_type)
{
- const char *returnvalue = NULL;
+ const char *returnvalue = nullptr;
switch (kernel_type)
{
case nbnxnkNotSet:
case nbnxnkNR:
default:
gmx_fatal(FARGS, "Illegal kernel type selected");
- returnvalue = NULL;
+ returnvalue = nullptr;
break;
}
return returnvalue;
};
static void pick_nbnxn_kernel(FILE *fp,
- const t_commrec *cr,
+ const gmx::MDLogger &mdlog,
gmx_bool use_simd_kernels,
gmx_bool bUseGPU,
- gmx_bool bEmulateGPU,
+ bool emulateGpu,
const t_inputrec *ir,
int *kernel_type,
int *ewald_excl,
*kernel_type = nbnxnkNotSet;
*ewald_excl = ewaldexclTable;
- if (bEmulateGPU)
+ if (emulateGpu)
{
*kernel_type = nbnxnk8x8x8_PlainC;
if (bDoNonbonded)
{
- md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
+ GMX_LOG(mdlog.warning).asParagraph().appendText("Emulating a GPU run on the CPU (slow)");
}
}
else if (bUseGPU)
if (*kernel_type == nbnxnkNotSet)
{
if (use_simd_kernels &&
- nbnxn_simd_supported(fp, cr, ir))
+ nbnxn_simd_supported(mdlog, ir))
{
pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
}
}
}
- if (bDoNonbonded && fp != NULL)
+ if (bDoNonbonded && fp != nullptr)
{
fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
lookup_nbnxn_kernel_name(*kernel_type),
if (nbnxnk4x4_PlainC == *kernel_type ||
nbnxnk8x8x8_PlainC == *kernel_type)
{
- md_print_warn(cr, fp,
- "WARNING: Using the slow %s kernels. This should\n"
- "not happen during routine usage on supported platforms.\n\n",
- lookup_nbnxn_kernel_name(*kernel_type));
+ GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
+ "WARNING: Using the slow %s kernels. This should\n"
+ "not happen during routine usage on supported platforms.",
+ lookup_nbnxn_kernel_name(*kernel_type));
}
}
}
-static void pick_nbnxn_resources(FILE *fp,
- const t_commrec *cr,
- const gmx_hw_info_t *hwinfo,
- gmx_bool bDoNonbonded,
- gmx_bool *bUseGPU,
- gmx_bool *bEmulateGPU,
- const gmx_gpu_opt_t *gpu_opt)
-{
- gmx_bool bEmulateGPUEnvVarSet;
- char gpu_err_str[STRLEN];
-
- *bUseGPU = FALSE;
-
- bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
-
- /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
- * GPUs (currently) only handle non-bonded calculations, we will
- * automatically switch to emulation if non-bonded calculations are
- * turned off via GMX_NO_NONBONDED - this is the simple and elegant
- * way to turn off GPU initialization, data movement, and cleanup.
- *
- * GPU emulation can be useful to assess the performance one can expect by
- * adding GPU(s) to the machine. The conditional below allows this even
- * if mdrun is compiled without GPU acceleration support.
- * Note that you should freezing the system as otherwise it will explode.
- */
- *bEmulateGPU = (bEmulateGPUEnvVarSet ||
- (!bDoNonbonded && gpu_opt->n_dev_use > 0));
-
- /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
- */
- if (gpu_opt->n_dev_use > 0 && !(*bEmulateGPU))
- {
- /* Each PP node will use the intra-node id-th device from the
- * list of detected/selected GPUs. */
- if (!init_gpu(fp, cr->rank_pp_intranode, gpu_err_str,
- &hwinfo->gpu_info, gpu_opt))
- {
- /* At this point the init should never fail as we made sure that
- * we have all the GPUs we need. If it still does, we'll bail. */
- /* TODO the decorating of gpu_err_str is nicer if it
- happens inside init_gpu. Out here, the decorating with
- the MPI rank makes sense. */
- gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
- cr->nodeid,
- get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
- cr->rank_pp_intranode),
- gpu_err_str);
- }
-
- /* Here we actually turn on hardware GPU acceleration */
- *bUseGPU = TRUE;
- }
-}
-
gmx_bool uses_simple_tables(int cutoff_scheme,
nonbonded_verlet_t *nbv,
int group)
{
init_ewald_f_table(ic, rtab);
- if (fp != NULL)
+ if (fp != nullptr)
{
fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1/ic->tabq_scale, ic->tabq_size);
snew_aligned(ic->tabq_coul_F, 16, 32);
snew_aligned(ic->tabq_coul_V, 16, 32);
- ic->rlist = fr->rlist;
-
/* Lennard-Jones */
ic->vdwtype = fr->vdwtype;
ic->vdw_modifier = fr->vdw_modifier;
}
}
- if (fp != NULL)
+ if (fp != nullptr)
{
real dispersion_shift;
*interaction_const = ic;
}
+/* TODO deviceInfo should be logically const, but currently
+ * init_gpu modifies it to set up NVML support. This could
+ * happen during the detection phase, and deviceInfo could
+ * the become const. */
static void init_nb_verlet(FILE *fp,
+ const gmx::MDLogger &mdlog,
nonbonded_verlet_t **nb_verlet,
gmx_bool bFEP_NonBonded,
const t_inputrec *ir,
const t_forcerec *fr,
const t_commrec *cr,
- const char *nbpu_opt)
+ const char *nbpu_opt,
+ gmx_device_info_t *deviceInfo,
+ const gmx_mtop_t *mtop,
+ matrix box)
{
nonbonded_verlet_t *nbv;
int i;
char *env;
- gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
+ gmx_bool bHybridGPURun = FALSE;
nbnxn_alloc_t *nb_alloc;
nbnxn_free_t *nb_free;
- snew(nbv, 1);
+ nbv = new nonbonded_verlet_t();
- pick_nbnxn_resources(fp, cr, fr->hwinfo,
- fr->bNonbonded,
- &nbv->bUseGPU,
- &bEmulateGPU,
- fr->gpu_opt);
+ nbv->emulateGpu = (getenv("GMX_EMULATE_GPU") != nullptr);
+ nbv->bUseGPU = deviceInfo != nullptr;
- nbv->nbs = NULL;
+ GMX_RELEASE_ASSERT(!(nbv->emulateGpu && nbv->bUseGPU), "When GPU emulation is active, there cannot be a GPU assignment");
+
+ if (nbv->bUseGPU)
+ {
+ /* Use the assigned GPU. */
+ init_gpu(mdlog, cr->nodeid, deviceInfo);
+ }
+
+ nbv->nbs = nullptr;
nbv->min_ci_balanced = 0;
nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
for (i = 0; i < nbv->ngrp; i++)
{
nbv->grp[i].nbl_lists.nnbl = 0;
- nbv->grp[i].nbat = NULL;
+ nbv->grp[i].nbat = nullptr;
nbv->grp[i].kernel_type = nbnxnkNotSet;
if (i == 0) /* local */
{
- pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
- nbv->bUseGPU, bEmulateGPU, ir,
+ pick_nbnxn_kernel(fp, mdlog, fr->use_simd_kernels,
+ nbv->bUseGPU, nbv->emulateGpu, ir,
&nbv->grp[i].kernel_type,
&nbv->grp[i].ewald_excl,
fr->bNonbonded);
}
else /* non-local */
{
- if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
+ if (nbpu_opt != nullptr && strcmp(nbpu_opt, "gpu_cpu") == 0)
{
/* Use GPU for local, select a CPU kernel for non-local */
- pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
- FALSE, FALSE, ir,
+ pick_nbnxn_kernel(fp, mdlog, fr->use_simd_kernels,
+ FALSE, false, ir,
&nbv->grp[i].kernel_type,
&nbv->grp[i].ewald_excl,
fr->bNonbonded);
}
}
+ nbv->listParams = std::unique_ptr<NbnxnListParameters>(new NbnxnListParameters(ir->rlist));
+ setupDynamicPairlistPruning(fp, ir, mtop, box, nbv->bUseGPU, fr->ic,
+ nbv->listParams.get());
+
nbnxn_init_search(&nbv->nbs,
- DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
- DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
+ DOMAINDECOMP(cr) ? &cr->dd->nc : nullptr,
+ DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : nullptr,
bFEP_NonBonded,
gmx_omp_nthreads_get(emntPairsearch));
if (fr->vdwtype == evdwCUT &&
(fr->vdw_modifier == eintmodNONE ||
fr->vdw_modifier == eintmodPOTSHIFT) &&
- getenv("GMX_NO_LJ_COMB_RULE") == NULL)
+ getenv("GMX_NO_LJ_COMB_RULE") == nullptr)
{
/* Plain LJ cut-off: we can optimize with combination rules */
enbnxninitcombrule = enbnxninitcombruleDETECT;
/* init the NxN GPU data; the last argument tells whether we'll have
* both local and non-local NB calculation on GPU */
nbnxn_gpu_init(&nbv->gpu_nbv,
- &fr->hwinfo->gpu_info,
- fr->gpu_opt,
+ deviceInfo,
fr->ic,
+ nbv->listParams.get(),
nbv->grp,
- cr->rank_pp_intranode,
cr->nodeid,
(nbv->ngrp > 1) && !bHybridGPURun);
}
#endif /* GMX_THREAD_MPI */
- if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
+ if ((env = getenv("GMX_NB_MIN_CI")) != nullptr)
{
char *end;
gmx_bool usingGpu(nonbonded_verlet_t *nbv)
{
- return nbv != NULL && nbv->bUseGPU;
+ return nbv != nullptr && nbv->bUseGPU;
}
-void init_forcerec(FILE *fp,
- t_forcerec *fr,
- t_fcdata *fcd,
- const t_inputrec *ir,
- const gmx_mtop_t *mtop,
- const t_commrec *cr,
- matrix box,
- const char *tabfn,
- const char *tabpfn,
- const t_filenm *tabbfnm,
- const char *nbpu_opt,
- gmx_bool bNoSolvOpt,
- real print_force)
+void init_forcerec(FILE *fp,
+ const gmx::MDLogger &mdlog,
+ t_forcerec *fr,
+ t_fcdata *fcd,
+ const t_inputrec *ir,
+ const gmx_mtop_t *mtop,
+ const t_commrec *cr,
+ matrix box,
+ const char *tabfn,
+ const char *tabpfn,
+ const t_filenm *tabbfnm,
+ const char *nbpu_opt,
+ gmx_device_info_t *deviceInfo,
+ gmx_bool bNoSolvOpt,
+ real print_force)
{
int i, m, negp_pp, negptable, egi, egj;
real rtab;
gmx_bool bFEP_NonBonded;
int *nm_ind, egp_flags;
- if (fr->hwinfo == NULL)
- {
- /* Detect hardware, gather information.
- * In mdrun, hwinfo has already been set before calling init_forcerec.
- * Here we ignore GPUs, as tools will not use them anyhow.
- */
- fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
- }
-
/* By default we turn SIMD kernels on, but it might be turned off further down... */
fr->use_simd_kernels = TRUE;
fr->sc_sigma6_def = gmx::power6(ir->fepvals->sc_sigma);
env = getenv("GMX_SCSIGMA_MIN");
- if (env != NULL)
+ if (env != nullptr)
{
dbl = 0;
sscanf(env, "%20lf", &dbl);
}
fr->bNonbonded = TRUE;
- if (getenv("GMX_NO_NONBONDED") != NULL)
+ if (getenv("GMX_NO_NONBONDED") != nullptr)
{
/* turn off non-bonded calculations */
fr->bNonbonded = FALSE;
- md_print_warn(cr, fp,
- "Found environment variable GMX_NO_NONBONDED.\n"
- "Disabling nonbonded calculations.\n");
+ GMX_LOG(mdlog.warning).asParagraph().appendText(
+ "Found environment variable GMX_NO_NONBONDED.\n"
+ "Disabling nonbonded calculations.");
}
bGenericKernelOnly = FALSE;
* can be used with water optimization, and disable it if that is not the case.
*/
- if (getenv("GMX_NB_GENERIC") != NULL)
+ if (getenv("GMX_NB_GENERIC") != nullptr)
{
- if (fp != NULL)
+ if (fp != nullptr)
{
fprintf(fp,
"Found environment variable GMX_NB_GENERIC.\n"
bNoSolvOpt = TRUE;
}
- if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
+ if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != nullptr) || (getenv("GMX_NOOPTIMIZEDKERNELS") != nullptr) )
{
fr->use_simd_kernels = FALSE;
- if (fp != NULL)
+ if (fp != nullptr)
{
fprintf(fp,
"\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
/* Check if we can/should do all-vs-all kernels */
- fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
- fr->AllvsAll_work = NULL;
- fr->AllvsAll_workgb = NULL;
+ fr->bAllvsAll = can_use_allvsall(ir, FALSE, nullptr, nullptr);
+ fr->AllvsAll_work = nullptr;
+ fr->AllvsAll_workgb = nullptr;
/* All-vs-all kernels have not been implemented in 4.6 and later.
* See Redmine #1249. */
if (fr->bAllvsAll)
{
fr->bAllvsAll = FALSE;
- if (fp != NULL)
+ if (fp != nullptr)
{
fprintf(fp,
"\nYour simulation settings would have triggered the efficient all-vs-all\n"
{
fprintf(stderr, "\n%s\n", note);
}
- if (fp != NULL)
+ if (fp != nullptr)
{
fprintf(fp, "\n%s\n", note);
}
}
else
{
- fr->bMolPBC = TRUE;
+ /* Not making molecules whole is faster in most cases,
+ * but With orientation restraints we need whole molecules.
+ */
+ fr->bMolPBC = (fcd->orires.nr == 0);
- if (getenv("GMX_USE_GRAPH") != NULL)
+ if (getenv("GMX_USE_GRAPH") != nullptr)
{
fr->bMolPBC = FALSE;
if (fp)
{
- md_print_warn(cr, fp, "GMX_USE_GRAPH is set, using the graph for bonded interactions\n");
+ GMX_LOG(mdlog.warning).asParagraph().appendText("GMX_USE_GRAPH is set, using the graph for bonded interactions");
}
if (mtop->bIntermolecularInteractions)
{
- md_print_warn(cr, fp, "WARNING: Molecules linked by intermolecular interactions have to reside in the same periodic image, otherwise artifacts will occur!\n");
+ GMX_LOG(mdlog.warning).asParagraph().appendText("WARNING: Molecules linked by intermolecular interactions have to reside in the same periodic image, otherwise artifacts will occur!");
}
}
+ GMX_RELEASE_ASSERT(fr->bMolPBC || !mtop->bIntermolecularInteractions, "We need to use PBC within molecules with inter-molecular interactions");
+
if (bSHAKE && fr->bMolPBC)
{
gmx_fatal(FARGS, "SHAKE is not properly supported with intermolecular interactions. For short simulations where linked molecules remain in the same periodic image, the environment variable GMX_USE_GRAPH can be used to override this check.\n");
fr->bcoultab = FALSE;
}
+ /* This now calculates sum for q and C6 */
+ set_chargesum(fp, fr, mtop);
+
/* Tables are used for direct ewald sum */
if (fr->bEwald)
{
if (ir->ewald_geometry == eewg3DC)
{
+ bool haveNetCharge = (fabs(fr->qsum[0]) > 1e-4 ||
+ fabs(fr->qsum[1]) > 1e-4);
if (fp)
{
- fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
+ fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry%s.\n",
+ haveNetCharge ? " and net charge" : "");
}
please_cite(fp, "In-Chul99a");
+ if (haveNetCharge)
+ {
+ please_cite(fp, "Ballenegger2009");
+ }
}
}
fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
}
fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
+ fr->forceProviders->hasForcesWithoutVirialContribution() ||
gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
- gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
- inputrecElecField(ir)
- );
+ gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0);
+
+ if (fr->bF_NoVirSum)
+ {
+ fr->forceBufferNoVirialSummation = new PaddedRVecVector;
+ }
if (fr->cutoff_scheme == ecutsGROUP &&
ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
fr->cg_nalloc = ncg_mtop(mtop);
srenew(fr->cg_cm, fr->cg_nalloc);
}
- if (fr->shift_vec == NULL)
+ if (fr->shift_vec == nullptr)
{
snew(fr->shift_vec, SHIFTS);
}
- if (fr->fshift == NULL)
+ if (fr->fshift == nullptr)
{
snew(fr->fshift, SHIFTS);
}
- if (fr->nbfp == NULL)
+ if (fr->nbfp == nullptr)
{
fr->ntype = mtop->ffparams.atnr;
fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
&fr->kappa, &fr->k_rf, &fr->c_rf);
}
- /*This now calculates sum for q and c6*/
- set_chargesum(fp, fr, mtop);
-
/* Construct tables for the group scheme. A little unnecessary to
* make both vdw and coul tables sometimes, but what the
* heck. Note that both cutoff schemes construct Ewald tables in
/* make tables for ordinary interactions */
if (bSomeNormalNbListsAreInUse)
{
- make_nbf_tables(fp, fr, rtab, tabfn, NULL, NULL, &fr->nblists[0]);
+ make_nbf_tables(fp, fr, rtab, tabfn, nullptr, nullptr, &fr->nblists[0]);
m = 1;
}
else
&fr->bExcl_IntraCGAll_InterCGNone);
if (DOMAINDECOMP(cr))
{
- fr->cginfo = NULL;
+ fr->cginfo = nullptr;
}
else
{
GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw, "With Verlet lists and no PME rcoulomb and rvdw should be identical");
}
- init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
+ init_nb_verlet(fp, mdlog, &fr->nbv, bFEP_NonBonded, ir, fr,
+ cr, nbpu_opt, deviceInfo,
+ mtop, box);
}
if (ir->eDispCorr != edispcNO)
* in this run because the PME ranks have no knowledge of whether GPUs
* are used or not, but all ranks need to enter the barrier below.
*/
-void free_gpu_resources(const t_forcerec *fr,
- const t_commrec *cr,
- const gmx_gpu_info_t *gpu_info,
- const gmx_gpu_opt_t *gpu_opt)
+void free_gpu_resources(const t_forcerec *fr,
+ const t_commrec *cr,
+ const gmx_device_info_t *deviceInfo)
{
gmx_bool bIsPPrankUsingGPU;
char gpu_err_str[STRLEN];
nbnxn_gpu_free(fr->nbv->gpu_nbv);
/* stop the GPU profiler (only CUDA) */
stopGpuProfiler();
+ }
- /* With tMPI we need to wait for all ranks to finish deallocation before
- * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
- * GPU and context.
- *
- * This is not a concern in OpenCL where we use one context per rank which
- * is freed in nbnxn_gpu_free().
- *
- * Note: as only PP ranks need to free GPU resources, so it is safe to
- * not call the barrier on PME ranks.
- */
+ /* With tMPI we need to wait for all ranks to finish deallocation before
+ * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
+ * GPU and context.
+ *
+ * This is not a concern in OpenCL where we use one context per rank which
+ * is freed in nbnxn_gpu_free().
+ *
+ * Note: it is safe to not call the barrier on the ranks which do not use GPU,
+ * but it is easier and more futureproof to call it on the whole node.
+ */
#if GMX_THREAD_MPI
- if (PAR(cr))
- {
- gmx_barrier(cr);
- }
+ if (PAR(cr) || MULTISIM(cr))
+ {
+ gmx_barrier_physical_node(cr);
+ }
#endif /* GMX_THREAD_MPI */
+ if (bIsPPrankUsingGPU)
+ {
/* uninitialize GPU (by destroying the context) */
- if (!free_cuda_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
+ if (!free_cuda_gpu(deviceInfo, gpu_err_str))
{
gmx_warning("On rank %d failed to free GPU #%d: %s",
cr->nodeid, get_current_cuda_gpu_device_id(), gpu_err_str);