2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "gromacs/domdec/domdec.h"
47 #include "gromacs/ewald/ewald.h"
48 #include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
49 #include "gromacs/legacyheaders/copyrite.h"
50 #include "gromacs/legacyheaders/force.h"
51 #include "gromacs/legacyheaders/gmx_detect_hardware.h"
52 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
53 #include "gromacs/legacyheaders/inputrec.h"
54 #include "gromacs/legacyheaders/macros.h"
55 #include "gromacs/legacyheaders/md_logging.h"
56 #include "gromacs/legacyheaders/md_support.h"
57 #include "gromacs/legacyheaders/names.h"
58 #include "gromacs/legacyheaders/network.h"
59 #include "gromacs/legacyheaders/nonbonded.h"
60 #include "gromacs/legacyheaders/ns.h"
61 #include "gromacs/legacyheaders/qmmm.h"
62 #include "gromacs/legacyheaders/tables.h"
63 #include "gromacs/legacyheaders/txtdump.h"
64 #include "gromacs/legacyheaders/typedefs.h"
65 #include "gromacs/legacyheaders/types/commrec.h"
66 #include "gromacs/listed-forces/manage-threading.h"
67 #include "gromacs/math/calculate-ewald-splitting-coefficient.h"
68 #include "gromacs/math/units.h"
69 #include "gromacs/math/utilities.h"
70 #include "gromacs/math/vec.h"
71 #include "gromacs/mdlib/forcerec-threading.h"
72 #include "gromacs/mdlib/nb_verlet.h"
73 #include "gromacs/mdlib/nbnxn_atomdata.h"
74 #include "gromacs/mdlib/nbnxn_consts.h"
75 #include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
76 #include "gromacs/mdlib/nbnxn_search.h"
77 #include "gromacs/mdlib/nbnxn_simd.h"
78 #include "gromacs/pbcutil/ishift.h"
79 #include "gromacs/pbcutil/pbc.h"
80 #include "gromacs/simd/simd.h"
81 #include "gromacs/topology/mtop_util.h"
82 #include "gromacs/utility/fatalerror.h"
83 #include "gromacs/utility/smalloc.h"
85 #include "nbnxn_gpu_jit_support.h"
87 t_forcerec *mk_forcerec(void)
97 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
101 for (i = 0; (i < atnr); i++)
103 for (j = 0; (j < atnr); j++)
105 fprintf(fp, "%2d - %2d", i, j);
108 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
109 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
113 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
114 C12(nbfp, atnr, i, j)/12.0);
121 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
129 snew(nbfp, 3*atnr*atnr);
130 for (i = k = 0; (i < atnr); i++)
132 for (j = 0; (j < atnr); j++, k++)
134 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
135 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
136 /* nbfp now includes the 6.0 derivative prefactor */
137 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
143 snew(nbfp, 2*atnr*atnr);
144 for (i = k = 0; (i < atnr); i++)
146 for (j = 0; (j < atnr); j++, k++)
148 /* nbfp now includes the 6.0/12.0 derivative prefactors */
149 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
150 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
158 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
161 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
163 const real oneOverSix = 1.0 / 6.0;
165 /* For LJ-PME simulations, we correct the energies with the reciprocal space
166 * inside of the cut-off. To do this the non-bonded kernels needs to have
167 * access to the C6-values used on the reciprocal grid in pme.c
171 snew(grid, 2*atnr*atnr);
172 for (i = k = 0; (i < atnr); i++)
174 for (j = 0; (j < atnr); j++, k++)
176 c6i = idef->iparams[i*(atnr+1)].lj.c6;
177 c12i = idef->iparams[i*(atnr+1)].lj.c12;
178 c6j = idef->iparams[j*(atnr+1)].lj.c6;
179 c12j = idef->iparams[j*(atnr+1)].lj.c12;
180 c6 = sqrt(c6i * c6j);
181 if (fr->ljpme_combination_rule == eljpmeLB
182 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
184 sigmai = pow(c12i / c6i, oneOverSix);
185 sigmaj = pow(c12j / c6j, oneOverSix);
186 epsi = c6i * c6i / c12i;
187 epsj = c6j * c6j / c12j;
188 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
190 /* Store the elements at the same relative positions as C6 in nbfp in order
191 * to simplify access in the kernels
193 grid[2*(atnr*i+j)] = c6*6.0;
199 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
203 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
205 const real oneOverSix = 1.0 / 6.0;
208 snew(nbfp, 2*atnr*atnr);
209 for (i = 0; i < atnr; ++i)
211 for (j = 0; j < atnr; ++j)
213 c6i = idef->iparams[i*(atnr+1)].lj.c6;
214 c12i = idef->iparams[i*(atnr+1)].lj.c12;
215 c6j = idef->iparams[j*(atnr+1)].lj.c6;
216 c12j = idef->iparams[j*(atnr+1)].lj.c12;
217 c6 = sqrt(c6i * c6j);
218 c12 = sqrt(c12i * c12j);
219 if (comb_rule == eCOMB_ARITHMETIC
220 && !gmx_numzero(c6) && !gmx_numzero(c12))
222 sigmai = pow(c12i / c6i, oneOverSix);
223 sigmaj = pow(c12j / c6j, oneOverSix);
224 epsi = c6i * c6i / c12i;
225 epsj = c6j * c6j / c12j;
226 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
227 c12 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
229 C6(nbfp, atnr, i, j) = c6*6.0;
230 C12(nbfp, atnr, i, j) = c12*12.0;
236 /* This routine sets fr->solvent_opt to the most common solvent in the
237 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
238 * the fr->solvent_type array with the correct type (or esolNO).
240 * Charge groups that fulfill the conditions but are not identical to the
241 * most common one will be marked as esolNO in the solvent_type array.
243 * TIP3p is identical to SPC for these purposes, so we call it
244 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
246 * NOTE: QM particle should not
247 * become an optimized solvent. Not even if there is only one charge
257 } solvent_parameters_t;
260 check_solvent_cg(const gmx_moltype_t *molt,
263 const unsigned char *qm_grpnr,
264 const t_grps *qm_grps,
266 int *n_solvent_parameters,
267 solvent_parameters_t **solvent_parameters_p,
277 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
278 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
281 solvent_parameters_t *solvent_parameters;
283 /* We use a list with parameters for each solvent type.
284 * Every time we discover a new molecule that fulfills the basic
285 * conditions for a solvent we compare with the previous entries
286 * in these lists. If the parameters are the same we just increment
287 * the counter for that type, and otherwise we create a new type
288 * based on the current molecule.
290 * Once we've finished going through all molecules we check which
291 * solvent is most common, and mark all those molecules while we
292 * clear the flag on all others.
295 solvent_parameters = *solvent_parameters_p;
297 /* Mark the cg first as non optimized */
300 /* Check if this cg has no exclusions with atoms in other charge groups
301 * and all atoms inside the charge group excluded.
302 * We only have 3 or 4 atom solvent loops.
304 if (GET_CGINFO_EXCL_INTER(cginfo) ||
305 !GET_CGINFO_EXCL_INTRA(cginfo))
310 /* Get the indices of the first atom in this charge group */
311 j0 = molt->cgs.index[cg0];
312 j1 = molt->cgs.index[cg0+1];
314 /* Number of atoms in our molecule */
320 "Moltype '%s': there are %d atoms in this charge group\n",
324 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
327 if (nj < 3 || nj > 4)
332 /* Check if we are doing QM on this group */
334 if (qm_grpnr != NULL)
336 for (j = j0; j < j1 && !qm; j++)
338 qm = (qm_grpnr[j] < qm_grps->nr - 1);
341 /* Cannot use solvent optimization with QM */
347 atom = molt->atoms.atom;
349 /* Still looks like a solvent, time to check parameters */
351 /* If it is perturbed (free energy) we can't use the solvent loops,
352 * so then we just skip to the next molecule.
356 for (j = j0; j < j1 && !perturbed; j++)
358 perturbed = PERTURBED(atom[j]);
366 /* Now it's only a question if the VdW and charge parameters
367 * are OK. Before doing the check we compare and see if they are
368 * identical to a possible previous solvent type.
369 * First we assign the current types and charges.
371 for (j = 0; j < nj; j++)
373 tmp_vdwtype[j] = atom[j0+j].type;
374 tmp_charge[j] = atom[j0+j].q;
377 /* Does it match any previous solvent type? */
378 for (k = 0; k < *n_solvent_parameters; k++)
383 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
384 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
385 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
390 /* Check that types & charges match for all atoms in molecule */
391 for (j = 0; j < nj && match == TRUE; j++)
393 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
397 if (tmp_charge[j] != solvent_parameters[k].charge[j])
404 /* Congratulations! We have a matched solvent.
405 * Flag it with this type for later processing.
408 solvent_parameters[k].count += nmol;
410 /* We are done with this charge group */
415 /* If we get here, we have a tentative new solvent type.
416 * Before we add it we must check that it fulfills the requirements
417 * of the solvent optimized loops. First determine which atoms have
420 for (j = 0; j < nj; j++)
423 tjA = tmp_vdwtype[j];
425 /* Go through all other tpes and see if any have non-zero
426 * VdW parameters when combined with this one.
428 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
430 /* We already checked that the atoms weren't perturbed,
431 * so we only need to check state A now.
435 has_vdw[j] = (has_vdw[j] ||
436 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
437 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
438 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
443 has_vdw[j] = (has_vdw[j] ||
444 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
445 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
450 /* Now we know all we need to make the final check and assignment. */
454 * For this we require thatn all atoms have charge,
455 * the charges on atom 2 & 3 should be the same, and only
456 * atom 1 might have VdW.
458 if (has_vdw[1] == FALSE &&
459 has_vdw[2] == FALSE &&
460 tmp_charge[0] != 0 &&
461 tmp_charge[1] != 0 &&
462 tmp_charge[2] == tmp_charge[1])
464 srenew(solvent_parameters, *n_solvent_parameters+1);
465 solvent_parameters[*n_solvent_parameters].model = esolSPC;
466 solvent_parameters[*n_solvent_parameters].count = nmol;
467 for (k = 0; k < 3; k++)
469 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
470 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
473 *cg_sp = *n_solvent_parameters;
474 (*n_solvent_parameters)++;
479 /* Or could it be a TIP4P?
480 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
481 * Only atom 1 mght have VdW.
483 if (has_vdw[1] == FALSE &&
484 has_vdw[2] == FALSE &&
485 has_vdw[3] == FALSE &&
486 tmp_charge[0] == 0 &&
487 tmp_charge[1] != 0 &&
488 tmp_charge[2] == tmp_charge[1] &&
491 srenew(solvent_parameters, *n_solvent_parameters+1);
492 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
493 solvent_parameters[*n_solvent_parameters].count = nmol;
494 for (k = 0; k < 4; k++)
496 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
497 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
500 *cg_sp = *n_solvent_parameters;
501 (*n_solvent_parameters)++;
505 *solvent_parameters_p = solvent_parameters;
509 check_solvent(FILE * fp,
510 const gmx_mtop_t * mtop,
512 cginfo_mb_t *cginfo_mb)
515 const gmx_moltype_t *molt;
516 int mb, mol, cg_mol, at_offset, am, cgm, i, nmol_ch, nmol;
517 int n_solvent_parameters;
518 solvent_parameters_t *solvent_parameters;
524 fprintf(debug, "Going to determine what solvent types we have.\n");
527 n_solvent_parameters = 0;
528 solvent_parameters = NULL;
529 /* Allocate temporary array for solvent type */
530 snew(cg_sp, mtop->nmolblock);
533 for (mb = 0; mb < mtop->nmolblock; mb++)
535 molt = &mtop->moltype[mtop->molblock[mb].type];
537 /* Here we have to loop over all individual molecules
538 * because we need to check for QMMM particles.
540 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
541 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
542 nmol = mtop->molblock[mb].nmol/nmol_ch;
543 for (mol = 0; mol < nmol_ch; mol++)
546 am = mol*cgs->index[cgs->nr];
547 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
549 check_solvent_cg(molt, cg_mol, nmol,
550 mtop->groups.grpnr[egcQMMM] ?
551 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
552 &mtop->groups.grps[egcQMMM],
554 &n_solvent_parameters, &solvent_parameters,
555 cginfo_mb[mb].cginfo[cgm+cg_mol],
556 &cg_sp[mb][cgm+cg_mol]);
559 at_offset += cgs->index[cgs->nr];
562 /* Puh! We finished going through all charge groups.
563 * Now find the most common solvent model.
566 /* Most common solvent this far */
568 for (i = 0; i < n_solvent_parameters; i++)
571 solvent_parameters[i].count > solvent_parameters[bestsp].count)
579 bestsol = solvent_parameters[bestsp].model;
587 for (mb = 0; mb < mtop->nmolblock; mb++)
589 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
590 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
591 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
593 if (cg_sp[mb][i] == bestsp)
595 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
600 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
607 if (bestsol != esolNO && fp != NULL)
609 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
611 solvent_parameters[bestsp].count);
614 sfree(solvent_parameters);
615 fr->solvent_opt = bestsol;
619 acNONE = 0, acCONSTRAINT, acSETTLE
622 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
623 t_forcerec *fr, gmx_bool bNoSolvOpt,
624 gmx_bool *bFEP_NonBonded,
625 gmx_bool *bExcl_IntraCGAll_InterCGNone)
628 const t_blocka *excl;
629 const gmx_moltype_t *molt;
630 const gmx_molblock_t *molb;
631 cginfo_mb_t *cginfo_mb;
634 int cg_offset, a_offset;
635 int mb, m, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
639 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
641 snew(cginfo_mb, mtop->nmolblock);
643 snew(type_VDW, fr->ntype);
644 for (ai = 0; ai < fr->ntype; ai++)
646 type_VDW[ai] = FALSE;
647 for (j = 0; j < fr->ntype; j++)
649 type_VDW[ai] = type_VDW[ai] ||
651 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
652 C12(fr->nbfp, fr->ntype, ai, j) != 0;
656 *bFEP_NonBonded = FALSE;
657 *bExcl_IntraCGAll_InterCGNone = TRUE;
660 snew(bExcl, excl_nalloc);
663 for (mb = 0; mb < mtop->nmolblock; mb++)
665 molb = &mtop->molblock[mb];
666 molt = &mtop->moltype[molb->type];
670 /* Check if the cginfo is identical for all molecules in this block.
671 * If so, we only need an array of the size of one molecule.
672 * Otherwise we make an array of #mol times #cgs per molecule.
675 for (m = 0; m < molb->nmol; m++)
677 int am = m*cgs->index[cgs->nr];
678 for (cg = 0; cg < cgs->nr; cg++)
681 a1 = cgs->index[cg+1];
682 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
683 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
687 if (mtop->groups.grpnr[egcQMMM] != NULL)
689 for (ai = a0; ai < a1; ai++)
691 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
692 mtop->groups.grpnr[egcQMMM][a_offset +ai])
701 cginfo_mb[mb].cg_start = cg_offset;
702 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
703 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
704 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
705 cginfo = cginfo_mb[mb].cginfo;
707 /* Set constraints flags for constrained atoms */
708 snew(a_con, molt->atoms.nr);
709 for (ftype = 0; ftype < F_NRE; ftype++)
711 if (interaction_function[ftype].flags & IF_CONSTRAINT)
716 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
720 for (a = 0; a < nral; a++)
722 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
723 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
729 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
732 int am = m*cgs->index[cgs->nr];
733 for (cg = 0; cg < cgs->nr; cg++)
736 a1 = cgs->index[cg+1];
738 /* Store the energy group in cginfo */
739 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
740 SET_CGINFO_GID(cginfo[cgm+cg], gid);
742 /* Check the intra/inter charge group exclusions */
743 if (a1-a0 > excl_nalloc)
745 excl_nalloc = a1 - a0;
746 srenew(bExcl, excl_nalloc);
748 /* bExclIntraAll: all intra cg interactions excluded
749 * bExclInter: any inter cg interactions excluded
751 bExclIntraAll = TRUE;
755 bHavePerturbedAtoms = FALSE;
756 for (ai = a0; ai < a1; ai++)
758 /* Check VDW and electrostatic interactions */
759 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
760 type_VDW[molt->atoms.atom[ai].typeB]);
761 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
762 molt->atoms.atom[ai].qB != 0);
764 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
766 /* Clear the exclusion list for atom ai */
767 for (aj = a0; aj < a1; aj++)
769 bExcl[aj-a0] = FALSE;
771 /* Loop over all the exclusions of atom ai */
772 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
775 if (aj < a0 || aj >= a1)
784 /* Check if ai excludes a0 to a1 */
785 for (aj = a0; aj < a1; aj++)
789 bExclIntraAll = FALSE;
796 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
799 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
807 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
811 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
813 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
815 /* The size in cginfo is currently only read with DD */
816 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
820 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
824 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
826 if (bHavePerturbedAtoms && fr->efep != efepNO)
828 SET_CGINFO_FEP(cginfo[cgm+cg]);
829 *bFEP_NonBonded = TRUE;
831 /* Store the charge group size */
832 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
834 if (!bExclIntraAll || bExclInter)
836 *bExcl_IntraCGAll_InterCGNone = FALSE;
843 cg_offset += molb->nmol*cgs->nr;
844 a_offset += molb->nmol*cgs->index[cgs->nr];
848 /* the solvent optimizer is called after the QM is initialized,
849 * because we don't want to have the QM subsystemto become an
853 check_solvent(fplog, mtop, fr, cginfo_mb);
855 if (getenv("GMX_NO_SOLV_OPT"))
859 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
860 "Disabling all solvent optimization\n");
862 fr->solvent_opt = esolNO;
866 fr->solvent_opt = esolNO;
868 if (!fr->solvent_opt)
870 for (mb = 0; mb < mtop->nmolblock; mb++)
872 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
874 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
882 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
887 ncg = cgi_mb[nmb-1].cg_end;
890 for (cg = 0; cg < ncg; cg++)
892 while (cg >= cgi_mb[mb].cg_end)
897 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
903 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
905 /*This now calculates sum for q and c6*/
906 double qsum, q2sum, q, c6sum, c6;
908 const t_atoms *atoms;
913 for (mb = 0; mb < mtop->nmolblock; mb++)
915 nmol = mtop->molblock[mb].nmol;
916 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
917 for (i = 0; i < atoms->nr; i++)
919 q = atoms->atom[i].q;
922 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
927 fr->q2sum[0] = q2sum;
928 fr->c6sum[0] = c6sum;
930 if (fr->efep != efepNO)
935 for (mb = 0; mb < mtop->nmolblock; mb++)
937 nmol = mtop->molblock[mb].nmol;
938 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
939 for (i = 0; i < atoms->nr; i++)
941 q = atoms->atom[i].qB;
944 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
948 fr->q2sum[1] = q2sum;
949 fr->c6sum[1] = c6sum;
954 fr->qsum[1] = fr->qsum[0];
955 fr->q2sum[1] = fr->q2sum[0];
956 fr->c6sum[1] = fr->c6sum[0];
960 if (fr->efep == efepNO)
962 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
966 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
967 fr->qsum[0], fr->qsum[1]);
972 void update_forcerec(t_forcerec *fr, matrix box)
974 if (fr->eeltype == eelGRF)
976 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
977 fr->rcoulomb, fr->temp, fr->zsquare, box,
978 &fr->kappa, &fr->k_rf, &fr->c_rf);
982 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
984 const t_atoms *atoms, *atoms_tpi;
985 const t_blocka *excl;
986 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, nexcl, q;
987 gmx_int64_t npair, npair_ij, tmpi, tmpj;
988 double csix, ctwelve;
992 real *nbfp_comb = NULL;
998 /* For LJ-PME, we want to correct for the difference between the
999 * actual C6 values and the C6 values used by the LJ-PME based on
1000 * combination rules. */
1002 if (EVDW_PME(fr->vdwtype))
1004 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1005 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1006 for (tpi = 0; tpi < ntp; ++tpi)
1008 for (tpj = 0; tpj < ntp; ++tpj)
1010 C6(nbfp_comb, ntp, tpi, tpj) =
1011 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1012 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1017 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1025 /* Count the types so we avoid natoms^2 operations */
1026 snew(typecount, ntp);
1027 gmx_mtop_count_atomtypes(mtop, q, typecount);
1029 for (tpi = 0; tpi < ntp; tpi++)
1031 for (tpj = tpi; tpj < ntp; tpj++)
1033 tmpi = typecount[tpi];
1034 tmpj = typecount[tpj];
1037 npair_ij = tmpi*tmpj;
1041 npair_ij = tmpi*(tmpi - 1)/2;
1045 /* nbfp now includes the 6.0 derivative prefactor */
1046 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1050 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1051 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1052 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1058 /* Subtract the excluded pairs.
1059 * The main reason for substracting exclusions is that in some cases
1060 * some combinations might never occur and the parameters could have
1061 * any value. These unused values should not influence the dispersion
1064 for (mb = 0; mb < mtop->nmolblock; mb++)
1066 nmol = mtop->molblock[mb].nmol;
1067 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1068 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1069 for (i = 0; (i < atoms->nr); i++)
1073 tpi = atoms->atom[i].type;
1077 tpi = atoms->atom[i].typeB;
1079 j1 = excl->index[i];
1080 j2 = excl->index[i+1];
1081 for (j = j1; j < j2; j++)
1088 tpj = atoms->atom[k].type;
1092 tpj = atoms->atom[k].typeB;
1096 /* nbfp now includes the 6.0 derivative prefactor */
1097 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1101 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1102 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1103 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1113 /* Only correct for the interaction of the test particle
1114 * with the rest of the system.
1117 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1120 for (mb = 0; mb < mtop->nmolblock; mb++)
1122 nmol = mtop->molblock[mb].nmol;
1123 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1124 for (j = 0; j < atoms->nr; j++)
1127 /* Remove the interaction of the test charge group
1130 if (mb == mtop->nmolblock-1)
1134 if (mb == 0 && nmol == 1)
1136 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1141 tpj = atoms->atom[j].type;
1145 tpj = atoms->atom[j].typeB;
1147 for (i = 0; i < fr->n_tpi; i++)
1151 tpi = atoms_tpi->atom[i].type;
1155 tpi = atoms_tpi->atom[i].typeB;
1159 /* nbfp now includes the 6.0 derivative prefactor */
1160 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1164 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1165 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1166 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1173 if (npair - nexcl <= 0 && fplog)
1175 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1181 csix /= npair - nexcl;
1182 ctwelve /= npair - nexcl;
1186 fprintf(debug, "Counted %d exclusions\n", nexcl);
1187 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1188 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1190 fr->avcsix[q] = csix;
1191 fr->avctwelve[q] = ctwelve;
1194 if (EVDW_PME(fr->vdwtype))
1201 if (fr->eDispCorr == edispcAllEner ||
1202 fr->eDispCorr == edispcAllEnerPres)
1204 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1205 fr->avcsix[0], fr->avctwelve[0]);
1209 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1215 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1216 const gmx_mtop_t *mtop)
1218 const t_atoms *at1, *at2;
1219 int mt1, mt2, i, j, tpi, tpj, ntypes;
1225 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1232 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1234 at1 = &mtop->moltype[mt1].atoms;
1235 for (i = 0; (i < at1->nr); i++)
1237 tpi = at1->atom[i].type;
1240 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1243 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1245 at2 = &mtop->moltype[mt2].atoms;
1246 for (j = 0; (j < at2->nr); j++)
1248 tpj = at2->atom[j].type;
1251 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1253 b = BHAMB(nbfp, ntypes, tpi, tpj);
1254 if (b > fr->bham_b_max)
1258 if ((b < bmin) || (bmin == -1))
1268 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1269 bmin, fr->bham_b_max);
1273 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1274 t_forcerec *fr, real rtab,
1275 const t_commrec *cr,
1276 const char *tabfn, char *eg1, char *eg2,
1286 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1291 sprintf(buf, "%s", tabfn);
1294 /* Append the two energy group names */
1295 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1296 eg1, eg2, ftp2ext(efXVG));
1298 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1299 /* Copy the contents of the table to separate coulomb and LJ tables too,
1300 * to improve cache performance.
1302 /* For performance reasons we want
1303 * the table data to be aligned to 16-byte. The pointers could be freed
1304 * but currently aren't.
1306 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1307 nbl->table_elec.format = nbl->table_elec_vdw.format;
1308 nbl->table_elec.r = nbl->table_elec_vdw.r;
1309 nbl->table_elec.n = nbl->table_elec_vdw.n;
1310 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1311 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1312 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1313 nbl->table_elec.ninteractions = 1;
1314 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1315 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1317 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1318 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1319 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1320 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1321 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1322 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1323 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1324 nbl->table_vdw.ninteractions = 2;
1325 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1326 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1328 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1330 for (j = 0; j < 4; j++)
1332 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1334 for (j = 0; j < 8; j++)
1336 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1341 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1342 int *ncount, int **count)
1344 const gmx_moltype_t *molt;
1346 int mt, ftype, stride, i, j, tabnr;
1348 for (mt = 0; mt < mtop->nmoltype; mt++)
1350 molt = &mtop->moltype[mt];
1351 for (ftype = 0; ftype < F_NRE; ftype++)
1353 if (ftype == ftype1 || ftype == ftype2)
1355 il = &molt->ilist[ftype];
1356 stride = 1 + NRAL(ftype);
1357 for (i = 0; i < il->nr; i += stride)
1359 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1362 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1364 if (tabnr >= *ncount)
1366 srenew(*count, tabnr+1);
1367 for (j = *ncount; j < tabnr+1; j++)
1380 static bondedtable_t *make_bonded_tables(FILE *fplog,
1381 int ftype1, int ftype2,
1382 const gmx_mtop_t *mtop,
1383 const char *basefn, const char *tabext)
1385 int i, ncount, *count;
1393 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1398 for (i = 0; i < ncount; i++)
1402 sprintf(tabfn, "%s", basefn);
1403 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1404 tabext, i, ftp2ext(efXVG));
1405 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1414 void forcerec_set_ranges(t_forcerec *fr,
1415 int ncg_home, int ncg_force,
1417 int natoms_force_constr, int natoms_f_novirsum)
1422 /* fr->ncg_force is unused in the standard code,
1423 * but it can be useful for modified code dealing with charge groups.
1425 fr->ncg_force = ncg_force;
1426 fr->natoms_force = natoms_force;
1427 fr->natoms_force_constr = natoms_force_constr;
1429 if (fr->natoms_force_constr > fr->nalloc_force)
1431 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1435 srenew(fr->f_twin, fr->nalloc_force);
1439 if (fr->bF_NoVirSum)
1441 fr->f_novirsum_n = natoms_f_novirsum;
1442 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1444 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1445 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1450 fr->f_novirsum_n = 0;
1454 static real cutoff_inf(real cutoff)
1458 cutoff = GMX_CUTOFF_INF;
1464 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1465 t_forcerec *fr, const t_inputrec *ir,
1466 const char *tabfn, const gmx_mtop_t *mtop,
1474 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1478 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1480 sprintf(buf, "%s", tabfn);
1481 for (i = 0; i < ir->adress->n_tf_grps; i++)
1483 j = ir->adress->tf_table_index[i]; /* get energy group index */
1484 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1485 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1488 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1490 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1495 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1502 ir->rcoulomb == 0 &&
1504 ir->ePBC == epbcNONE &&
1505 ir->vdwtype == evdwCUT &&
1506 ir->coulombtype == eelCUT &&
1507 ir->efep == efepNO &&
1508 (ir->implicit_solvent == eisNO ||
1509 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1510 ir->gb_algorithm == egbHCT ||
1511 ir->gb_algorithm == egbOBC))) &&
1512 getenv("GMX_NO_ALLVSALL") == NULL
1515 if (bAllvsAll && ir->opts.ngener > 1)
1517 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1523 fprintf(stderr, "\n%s\n", note);
1527 fprintf(fp, "\n%s\n", note);
1533 if (bAllvsAll && fp && MASTER(cr))
1535 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1542 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1543 const t_commrec *cr,
1544 const t_inputrec *ir,
1547 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1549 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1550 bGPU ? "GPUs" : "SIMD kernels",
1551 bGPU ? "CPU only" : "plain-C kernels");
1559 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1563 *kernel_type = nbnxnk4x4_PlainC;
1564 *ewald_excl = ewaldexclTable;
1566 #ifdef GMX_NBNXN_SIMD
1568 #ifdef GMX_NBNXN_SIMD_4XN
1569 *kernel_type = nbnxnk4xN_SIMD_4xN;
1571 #ifdef GMX_NBNXN_SIMD_2XNN
1572 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1575 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1576 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1577 * Currently this is based on the SIMD acceleration choice,
1578 * but it might be better to decide this at runtime based on CPU.
1580 * 4xN calculates more (zero) interactions, but has less pair-search
1581 * work and much better kernel instruction scheduling.
1583 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1584 * which doesn't have FMA, both the analytical and tabulated Ewald
1585 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1586 * 2x(4+4) because it results in significantly fewer pairs.
1587 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1588 * 10% with HT, 50% without HT. As we currently don't detect the actual
1589 * use of HT, use 4x8 to avoid a potential performance hit.
1590 * On Intel Haswell 4x8 is always faster.
1592 *kernel_type = nbnxnk4xN_SIMD_4xN;
1594 #ifndef GMX_SIMD_HAVE_FMA
1595 if (EEL_PME_EWALD(ir->coulombtype) ||
1596 EVDW_PME(ir->vdwtype))
1598 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1599 * There are enough instructions to make 2x(4+4) efficient.
1601 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1604 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1607 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1609 #ifdef GMX_NBNXN_SIMD_4XN
1610 *kernel_type = nbnxnk4xN_SIMD_4xN;
1612 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but GROMACS has been compiled without support for these kernels");
1615 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1617 #ifdef GMX_NBNXN_SIMD_2XNN
1618 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1620 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but GROMACS has been compiled without support for these kernels");
1624 /* Analytical Ewald exclusion correction is only an option in
1626 * Since table lookup's don't parallelize with SIMD, analytical
1627 * will probably always be faster for a SIMD width of 8 or more.
1628 * With FMA analytical is sometimes faster for a width if 4 as well.
1629 * On BlueGene/Q, this is faster regardless of precision.
1630 * In single precision, this is faster on Bulldozer.
1632 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1633 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1634 defined GMX_SIMD_IBM_QPX
1635 *ewald_excl = ewaldexclAnalytical;
1637 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1639 *ewald_excl = ewaldexclTable;
1641 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1643 *ewald_excl = ewaldexclAnalytical;
1647 #endif /* GMX_NBNXN_SIMD */
1651 const char *lookup_nbnxn_kernel_name(int kernel_type)
1653 const char *returnvalue = NULL;
1654 switch (kernel_type)
1657 returnvalue = "not set";
1659 case nbnxnk4x4_PlainC:
1660 returnvalue = "plain C";
1662 case nbnxnk4xN_SIMD_4xN:
1663 case nbnxnk4xN_SIMD_2xNN:
1664 #ifdef GMX_NBNXN_SIMD
1665 #if defined GMX_SIMD_X86_SSE2
1666 returnvalue = "SSE2";
1667 #elif defined GMX_SIMD_X86_SSE4_1
1668 returnvalue = "SSE4.1";
1669 #elif defined GMX_SIMD_X86_AVX_128_FMA
1670 returnvalue = "AVX_128_FMA";
1671 #elif defined GMX_SIMD_X86_AVX_256
1672 returnvalue = "AVX_256";
1673 #elif defined GMX_SIMD_X86_AVX2_256
1674 returnvalue = "AVX2_256";
1676 returnvalue = "SIMD";
1678 #else /* GMX_NBNXN_SIMD */
1679 returnvalue = "not available";
1680 #endif /* GMX_NBNXN_SIMD */
1682 case nbnxnk8x8x8_GPU: returnvalue = "GPU"; break;
1683 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1687 gmx_fatal(FARGS, "Illegal kernel type selected");
1694 static void pick_nbnxn_kernel(FILE *fp,
1695 const t_commrec *cr,
1696 gmx_bool use_simd_kernels,
1698 gmx_bool bEmulateGPU,
1699 const t_inputrec *ir,
1702 gmx_bool bDoNonbonded)
1704 assert(kernel_type);
1706 *kernel_type = nbnxnkNotSet;
1707 *ewald_excl = ewaldexclTable;
1711 *kernel_type = nbnxnk8x8x8_PlainC;
1715 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1720 *kernel_type = nbnxnk8x8x8_GPU;
1723 if (*kernel_type == nbnxnkNotSet)
1725 /* LJ PME with LB combination rule does 7 mesh operations.
1726 * This so slow that we don't compile SIMD non-bonded kernels for that.
1728 if (use_simd_kernels &&
1729 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1731 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1735 *kernel_type = nbnxnk4x4_PlainC;
1739 if (bDoNonbonded && fp != NULL)
1741 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1742 lookup_nbnxn_kernel_name(*kernel_type),
1743 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1744 nbnxn_kernel_to_cj_size(*kernel_type));
1746 if (nbnxnk4x4_PlainC == *kernel_type ||
1747 nbnxnk8x8x8_PlainC == *kernel_type)
1749 md_print_warn(cr, fp,
1750 "WARNING: Using the slow %s kernels. This should\n"
1751 "not happen during routine usage on supported platforms.\n\n",
1752 lookup_nbnxn_kernel_name(*kernel_type));
1757 static void pick_nbnxn_resources(FILE *fp,
1758 const t_commrec *cr,
1759 const gmx_hw_info_t *hwinfo,
1760 gmx_bool bDoNonbonded,
1762 gmx_bool *bEmulateGPU,
1763 const gmx_gpu_opt_t *gpu_opt)
1765 gmx_bool bEmulateGPUEnvVarSet;
1766 char gpu_err_str[STRLEN];
1770 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1772 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1773 * GPUs (currently) only handle non-bonded calculations, we will
1774 * automatically switch to emulation if non-bonded calculations are
1775 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1776 * way to turn off GPU initialization, data movement, and cleanup.
1778 * GPU emulation can be useful to assess the performance one can expect by
1779 * adding GPU(s) to the machine. The conditional below allows this even
1780 * if mdrun is compiled without GPU acceleration support.
1781 * Note that you should freezing the system as otherwise it will explode.
1783 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1784 (!bDoNonbonded && gpu_opt->n_dev_use > 0));
1786 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1788 if (gpu_opt->n_dev_use > 0 && !(*bEmulateGPU))
1790 /* Each PP node will use the intra-node id-th device from the
1791 * list of detected/selected GPUs. */
1792 if (!init_gpu(fp, cr->rank_pp_intranode, gpu_err_str,
1793 &hwinfo->gpu_info, gpu_opt))
1795 /* At this point the init should never fail as we made sure that
1796 * we have all the GPUs we need. If it still does, we'll bail. */
1797 /* TODO the decorating of gpu_err_str is nicer if it
1798 happens inside init_gpu. Out here, the decorating with
1799 the MPI rank makes sense. */
1800 gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1802 get_cuda_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1803 cr->rank_pp_intranode),
1807 /* Here we actually turn on hardware GPU acceleration */
1812 gmx_bool uses_simple_tables(int cutoff_scheme,
1813 nonbonded_verlet_t *nbv,
1816 gmx_bool bUsesSimpleTables = TRUE;
1819 switch (cutoff_scheme)
1822 bUsesSimpleTables = TRUE;
1825 assert(NULL != nbv && NULL != nbv->grp);
1826 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1827 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1830 gmx_incons("unimplemented");
1832 return bUsesSimpleTables;
1835 static void init_ewald_f_table(interaction_const_t *ic,
1836 gmx_bool bUsesSimpleTables,
1841 if (bUsesSimpleTables)
1843 /* Get the Ewald table spacing based on Coulomb and/or LJ
1844 * Ewald coefficients and rtol.
1846 ic->tabq_scale = ewald_spline3_table_scale(ic);
1848 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1849 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1853 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1854 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1855 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1858 sfree_aligned(ic->tabq_coul_FDV0);
1859 sfree_aligned(ic->tabq_coul_F);
1860 sfree_aligned(ic->tabq_coul_V);
1862 sfree_aligned(ic->tabq_vdw_FDV0);
1863 sfree_aligned(ic->tabq_vdw_F);
1864 sfree_aligned(ic->tabq_vdw_V);
1866 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1868 /* Create the original table data in FDV0 */
1869 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1870 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1871 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1872 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1873 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1876 if (EVDW_PME(ic->vdwtype))
1878 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1879 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1880 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1881 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1882 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1886 void init_interaction_const_tables(FILE *fp,
1887 interaction_const_t *ic,
1888 gmx_bool bUsesSimpleTables,
1891 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1893 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1897 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1898 1/ic->tabq_scale, ic->tabq_size);
1903 static void clear_force_switch_constants(shift_consts_t *sc)
1910 static void force_switch_constants(real p,
1914 /* Here we determine the coefficient for shifting the force to zero
1915 * between distance rsw and the cut-off rc.
1916 * For a potential of r^-p, we have force p*r^-(p+1).
1917 * But to save flops we absorb p in the coefficient.
1919 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1920 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1922 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1923 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1924 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1927 static void potential_switch_constants(real rsw, real rc,
1928 switch_consts_t *sc)
1930 /* The switch function is 1 at rsw and 0 at rc.
1931 * The derivative and second derivate are zero at both ends.
1932 * rsw = max(r - r_switch, 0)
1933 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1934 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1935 * force = force*dsw - potential*sw
1938 sc->c3 = -10*pow(rc - rsw, -3);
1939 sc->c4 = 15*pow(rc - rsw, -4);
1940 sc->c5 = -6*pow(rc - rsw, -5);
1943 /*! \brief Construct interaction constants
1945 * This data is used (particularly) by search and force code for
1946 * short-range interactions. Many of these are constant for the whole
1947 * simulation; some are constant only after PME tuning completes.
1950 init_interaction_const(FILE *fp,
1951 interaction_const_t **interaction_const,
1952 const t_forcerec *fr)
1954 interaction_const_t *ic;
1955 const real minusSix = -6.0;
1956 const real minusTwelve = -12.0;
1960 /* Just allocate something so we can free it */
1961 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1962 snew_aligned(ic->tabq_coul_F, 16, 32);
1963 snew_aligned(ic->tabq_coul_V, 16, 32);
1965 ic->rlist = fr->rlist;
1966 ic->rlistlong = fr->rlistlong;
1969 ic->vdwtype = fr->vdwtype;
1970 ic->vdw_modifier = fr->vdw_modifier;
1971 ic->rvdw = fr->rvdw;
1972 ic->rvdw_switch = fr->rvdw_switch;
1973 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1974 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1975 ic->sh_lj_ewald = 0;
1976 clear_force_switch_constants(&ic->dispersion_shift);
1977 clear_force_switch_constants(&ic->repulsion_shift);
1979 switch (ic->vdw_modifier)
1981 case eintmodPOTSHIFT:
1982 /* Only shift the potential, don't touch the force */
1983 ic->dispersion_shift.cpot = -pow(ic->rvdw, minusSix);
1984 ic->repulsion_shift.cpot = -pow(ic->rvdw, minusTwelve);
1985 if (EVDW_PME(ic->vdwtype))
1989 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
1990 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, minusSix);
1993 case eintmodFORCESWITCH:
1994 /* Switch the force, switch and shift the potential */
1995 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
1996 &ic->dispersion_shift);
1997 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
1998 &ic->repulsion_shift);
2000 case eintmodPOTSWITCH:
2001 /* Switch the potential and force */
2002 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2006 case eintmodEXACTCUTOFF:
2007 /* Nothing to do here */
2010 gmx_incons("unimplemented potential modifier");
2013 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2015 /* Electrostatics */
2016 ic->eeltype = fr->eeltype;
2017 ic->coulomb_modifier = fr->coulomb_modifier;
2018 ic->rcoulomb = fr->rcoulomb;
2019 ic->epsilon_r = fr->epsilon_r;
2020 ic->epsfac = fr->epsfac;
2021 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2023 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2025 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2032 /* Reaction-field */
2033 if (EEL_RF(ic->eeltype))
2035 ic->epsilon_rf = fr->epsilon_rf;
2036 ic->k_rf = fr->k_rf;
2037 ic->c_rf = fr->c_rf;
2041 /* For plain cut-off we might use the reaction-field kernels */
2042 ic->epsilon_rf = ic->epsilon_r;
2044 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2046 ic->c_rf = 1/ic->rcoulomb;
2056 real dispersion_shift;
2058 dispersion_shift = ic->dispersion_shift.cpot;
2059 if (EVDW_PME(ic->vdwtype))
2061 dispersion_shift -= ic->sh_lj_ewald;
2063 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2064 ic->repulsion_shift.cpot, dispersion_shift);
2066 if (ic->eeltype == eelCUT)
2068 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2070 else if (EEL_PME(ic->eeltype))
2072 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2077 *interaction_const = ic;
2080 /*! \brief Manage initialization within the NBNXN module of
2081 * run-time constants.
2084 initialize_gpu_constants(const t_commrec gmx_unused *cr,
2085 interaction_const_t *interaction_const,
2086 const struct nonbonded_verlet_t *nbv)
2088 if (nbv != NULL && nbv->bUseGPU)
2090 nbnxn_gpu_init_const(nbv->gpu_nbv, interaction_const, nbv->grp);
2092 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2093 * also sharing texture references. To keep the code simple, we don't
2094 * treat texture references as shared resources, but this means that
2095 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2096 * Hence, to ensure that the non-bonded kernels don't start before all
2097 * texture binding operations are finished, we need to wait for all ranks
2098 * to arrive here before continuing.
2100 * Note that we could omit this barrier if GPUs are not shared (or
2101 * texture objects are used), but as this is initialization code, there
2102 * is no point in complicating things.
2104 #ifdef GMX_THREAD_MPI
2109 #endif /* GMX_THREAD_MPI */
2114 static void init_nb_verlet(FILE *fp,
2115 nonbonded_verlet_t **nb_verlet,
2116 gmx_bool bFEP_NonBonded,
2117 const t_inputrec *ir,
2118 const t_forcerec *fr,
2119 const t_commrec *cr,
2120 const char *nbpu_opt)
2122 nonbonded_verlet_t *nbv;
2125 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2127 nbnxn_alloc_t *nb_alloc;
2128 nbnxn_free_t *nb_free;
2132 pick_nbnxn_resources(fp, cr, fr->hwinfo,
2140 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2141 for (i = 0; i < nbv->ngrp; i++)
2143 nbv->grp[i].nbl_lists.nnbl = 0;
2144 nbv->grp[i].nbat = NULL;
2145 nbv->grp[i].kernel_type = nbnxnkNotSet;
2147 if (i == 0) /* local */
2149 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2150 nbv->bUseGPU, bEmulateGPU, ir,
2151 &nbv->grp[i].kernel_type,
2152 &nbv->grp[i].ewald_excl,
2155 else /* non-local */
2157 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2159 /* Use GPU for local, select a CPU kernel for non-local */
2160 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2162 &nbv->grp[i].kernel_type,
2163 &nbv->grp[i].ewald_excl,
2166 bHybridGPURun = TRUE;
2170 /* Use the same kernel for local and non-local interactions */
2171 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2172 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2179 nbnxn_gpu_compile_kernels(cr->rank_pp_intranode, cr->nodeid, &fr->hwinfo->gpu_info, fr->gpu_opt, fr->ic);
2181 /* init the NxN GPU data; the last argument tells whether we'll have
2182 * both local and non-local NB calculation on GPU */
2183 nbnxn_gpu_init(fp, &nbv->gpu_nbv,
2184 &fr->hwinfo->gpu_info, fr->gpu_opt,
2185 cr->rank_pp_intranode,
2186 (nbv->ngrp > 1) && !bHybridGPURun);
2188 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2192 nbv->min_ci_balanced = strtol(env, &end, 10);
2193 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2195 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2200 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2201 nbv->min_ci_balanced);
2206 nbv->min_ci_balanced = nbnxn_gpu_min_ci_balanced(nbv->gpu_nbv);
2209 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2210 nbv->min_ci_balanced);
2216 nbv->min_ci_balanced = 0;
2221 nbnxn_init_search(&nbv->nbs,
2222 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2223 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2225 gmx_omp_nthreads_get(emntPairsearch));
2227 for (i = 0; i < nbv->ngrp; i++)
2229 gpu_set_host_malloc_and_free(nbv->grp[0].kernel_type == nbnxnk8x8x8_GPU,
2230 &nb_alloc, &nb_free);
2232 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2233 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2234 /* 8x8x8 "non-simple" lists are ATM always combined */
2235 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2239 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2241 gmx_bool bSimpleList;
2242 int enbnxninitcombrule;
2244 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2246 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2248 /* Plain LJ cut-off: we can optimize with combination rules */
2249 enbnxninitcombrule = enbnxninitcombruleDETECT;
2251 else if (fr->vdwtype == evdwPME)
2253 /* LJ-PME: we need to use a combination rule for the grid */
2254 if (fr->ljpme_combination_rule == eljpmeGEOM)
2256 enbnxninitcombrule = enbnxninitcombruleGEOM;
2260 enbnxninitcombrule = enbnxninitcombruleLB;
2265 /* We use a full combination matrix: no rule required */
2266 enbnxninitcombrule = enbnxninitcombruleNONE;
2270 snew(nbv->grp[i].nbat, 1);
2271 nbnxn_atomdata_init(fp,
2273 nbv->grp[i].kernel_type,
2275 fr->ntype, fr->nbfp,
2277 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2282 nbv->grp[i].nbat = nbv->grp[0].nbat;
2287 gmx_bool usingGpu(nonbonded_verlet_t *nbv)
2289 return nbv != NULL && nbv->bUseGPU;
2292 void init_forcerec(FILE *fp,
2293 const output_env_t oenv,
2296 const t_inputrec *ir,
2297 const gmx_mtop_t *mtop,
2298 const t_commrec *cr,
2304 const char *nbpu_opt,
2305 gmx_bool bNoSolvOpt,
2308 int i, m, negp_pp, negptable, egi, egj;
2313 gmx_bool bGenericKernelOnly;
2314 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2315 gmx_bool bFEP_NonBonded;
2316 int *nm_ind, egp_flags;
2318 if (fr->hwinfo == NULL)
2320 /* Detect hardware, gather information.
2321 * In mdrun, hwinfo has already been set before calling init_forcerec.
2322 * Here we ignore GPUs, as tools will not use them anyhow.
2324 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2327 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2328 fr->use_simd_kernels = TRUE;
2330 fr->bDomDec = DOMAINDECOMP(cr);
2332 if (check_box(ir->ePBC, box))
2334 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2337 /* Test particle insertion ? */
2340 /* Set to the size of the molecule to be inserted (the last one) */
2341 /* Because of old style topologies, we have to use the last cg
2342 * instead of the last molecule type.
2344 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2345 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2346 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2348 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2356 /* Copy AdResS parameters */
2359 fr->adress_type = ir->adress->type;
2360 fr->adress_const_wf = ir->adress->const_wf;
2361 fr->adress_ex_width = ir->adress->ex_width;
2362 fr->adress_hy_width = ir->adress->hy_width;
2363 fr->adress_icor = ir->adress->icor;
2364 fr->adress_site = ir->adress->site;
2365 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2366 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2369 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2370 for (i = 0; i < ir->adress->n_energy_grps; i++)
2372 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2375 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2376 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2377 for (i = 0; i < fr->n_adress_tf_grps; i++)
2379 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2381 copy_rvec(ir->adress->refs, fr->adress_refs);
2385 fr->adress_type = eAdressOff;
2386 fr->adress_do_hybridpairs = FALSE;
2389 /* Copy the user determined parameters */
2390 fr->userint1 = ir->userint1;
2391 fr->userint2 = ir->userint2;
2392 fr->userint3 = ir->userint3;
2393 fr->userint4 = ir->userint4;
2394 fr->userreal1 = ir->userreal1;
2395 fr->userreal2 = ir->userreal2;
2396 fr->userreal3 = ir->userreal3;
2397 fr->userreal4 = ir->userreal4;
2400 fr->fc_stepsize = ir->fc_stepsize;
2403 fr->efep = ir->efep;
2404 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2405 if (ir->fepvals->bScCoul)
2407 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2408 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2412 fr->sc_alphacoul = 0;
2413 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2415 fr->sc_power = ir->fepvals->sc_power;
2416 fr->sc_r_power = ir->fepvals->sc_r_power;
2417 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2419 env = getenv("GMX_SCSIGMA_MIN");
2423 sscanf(env, "%20lf", &dbl);
2424 fr->sc_sigma6_min = pow(dbl, 6);
2427 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2431 fr->bNonbonded = TRUE;
2432 if (getenv("GMX_NO_NONBONDED") != NULL)
2434 /* turn off non-bonded calculations */
2435 fr->bNonbonded = FALSE;
2436 md_print_warn(cr, fp,
2437 "Found environment variable GMX_NO_NONBONDED.\n"
2438 "Disabling nonbonded calculations.\n");
2441 bGenericKernelOnly = FALSE;
2443 /* We now check in the NS code whether a particular combination of interactions
2444 * can be used with water optimization, and disable it if that is not the case.
2447 if (getenv("GMX_NB_GENERIC") != NULL)
2452 "Found environment variable GMX_NB_GENERIC.\n"
2453 "Disabling all interaction-specific nonbonded kernels, will only\n"
2454 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2456 bGenericKernelOnly = TRUE;
2459 if (bGenericKernelOnly == TRUE)
2464 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2466 fr->use_simd_kernels = FALSE;
2470 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2471 "Disabling the usage of any SIMD-specific non-bonded & bonded kernel routines\n"
2472 "(e.g. SSE2/SSE4.1/AVX).\n\n");
2476 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2478 /* Check if we can/should do all-vs-all kernels */
2479 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2480 fr->AllvsAll_work = NULL;
2481 fr->AllvsAll_workgb = NULL;
2483 /* All-vs-all kernels have not been implemented in 4.6, and
2484 * the SIMD group kernels are also buggy in this case. Non-SIMD
2485 * group kernels are OK. See Redmine #1249. */
2488 fr->bAllvsAll = FALSE;
2489 fr->use_simd_kernels = FALSE;
2493 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2494 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2495 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2496 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2497 "we are proceeding by disabling all CPU architecture-specific\n"
2498 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2499 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2503 /* Neighbour searching stuff */
2504 fr->cutoff_scheme = ir->cutoff_scheme;
2505 fr->bGrid = (ir->ns_type == ensGRID);
2506 fr->ePBC = ir->ePBC;
2508 if (fr->cutoff_scheme == ecutsGROUP)
2510 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2511 "removed in a future release when 'verlet' supports all interaction forms.\n";
2515 fprintf(stderr, "\n%s\n", note);
2519 fprintf(fp, "\n%s\n", note);
2523 /* Determine if we will do PBC for distances in bonded interactions */
2524 if (fr->ePBC == epbcNONE)
2526 fr->bMolPBC = FALSE;
2530 if (!DOMAINDECOMP(cr))
2532 /* The group cut-off scheme and SHAKE assume charge groups
2533 * are whole, but not using molpbc is faster in most cases.
2535 if (fr->cutoff_scheme == ecutsGROUP ||
2536 (ir->eConstrAlg == econtSHAKE &&
2537 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2538 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2540 fr->bMolPBC = ir->bPeriodicMols;
2545 if (getenv("GMX_USE_GRAPH") != NULL)
2547 fr->bMolPBC = FALSE;
2550 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2557 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2560 fr->bGB = (ir->implicit_solvent == eisGBSA);
2562 fr->rc_scaling = ir->refcoord_scaling;
2563 copy_rvec(ir->posres_com, fr->posres_com);
2564 copy_rvec(ir->posres_comB, fr->posres_comB);
2565 fr->rlist = cutoff_inf(ir->rlist);
2566 fr->rlistlong = cutoff_inf(ir->rlistlong);
2567 fr->eeltype = ir->coulombtype;
2568 fr->vdwtype = ir->vdwtype;
2569 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2571 fr->coulomb_modifier = ir->coulomb_modifier;
2572 fr->vdw_modifier = ir->vdw_modifier;
2574 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2575 switch (fr->eeltype)
2578 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2584 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2588 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2589 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2598 case eelPMEUSERSWITCH:
2599 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2604 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2608 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2612 /* Vdw: Translate from mdp settings to kernel format */
2613 switch (fr->vdwtype)
2618 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2622 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2626 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2632 case evdwENCADSHIFT:
2633 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2637 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2641 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2642 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2643 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2645 fr->rvdw = cutoff_inf(ir->rvdw);
2646 fr->rvdw_switch = ir->rvdw_switch;
2647 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2648 fr->rcoulomb_switch = ir->rcoulomb_switch;
2650 fr->bTwinRange = fr->rlistlong > fr->rlist;
2651 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2653 fr->reppow = mtop->ffparams.reppow;
2655 if (ir->cutoff_scheme == ecutsGROUP)
2657 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2658 && !EVDW_PME(fr->vdwtype));
2659 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2660 fr->bcoultab = !(fr->eeltype == eelCUT ||
2661 fr->eeltype == eelEWALD ||
2662 fr->eeltype == eelPME ||
2663 fr->eeltype == eelRF ||
2664 fr->eeltype == eelRF_ZERO);
2666 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2667 * going to be faster to tabulate the interaction than calling the generic kernel.
2668 * However, if generic kernels have been requested we keep things analytically.
2670 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2671 fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2672 bGenericKernelOnly == FALSE)
2674 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2676 fr->bcoultab = TRUE;
2677 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2678 * which would otherwise need two tables.
2682 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2683 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2684 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2685 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2687 if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2689 fr->bcoultab = TRUE;
2693 if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2695 fr->bcoultab = TRUE;
2697 if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2702 if (getenv("GMX_REQUIRE_TABLES"))
2705 fr->bcoultab = TRUE;
2710 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2711 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2714 if (fr->bvdwtab == TRUE)
2716 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2717 fr->nbkernel_vdw_modifier = eintmodNONE;
2719 if (fr->bcoultab == TRUE)
2721 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2722 fr->nbkernel_elec_modifier = eintmodNONE;
2726 if (ir->cutoff_scheme == ecutsVERLET)
2728 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2730 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2732 fr->bvdwtab = FALSE;
2733 fr->bcoultab = FALSE;
2736 /* Tables are used for direct ewald sum */
2739 if (EEL_PME(ir->coulombtype))
2743 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2745 if (ir->coulombtype == eelP3M_AD)
2747 please_cite(fp, "Hockney1988");
2748 please_cite(fp, "Ballenegger2012");
2752 please_cite(fp, "Essmann95a");
2755 if (ir->ewald_geometry == eewg3DC)
2759 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2761 please_cite(fp, "In-Chul99a");
2764 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2765 init_ewald_tab(&(fr->ewald_table), ir, fp);
2768 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2769 1/fr->ewaldcoeff_q);
2773 if (EVDW_PME(ir->vdwtype))
2777 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2779 please_cite(fp, "Essmann95a");
2780 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2783 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2784 1/fr->ewaldcoeff_lj);
2788 /* Electrostatics */
2789 fr->epsilon_r = ir->epsilon_r;
2790 fr->epsilon_rf = ir->epsilon_rf;
2791 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2793 /* Parameters for generalized RF */
2797 if (fr->eeltype == eelGRF)
2799 init_generalized_rf(fp, mtop, ir, fr);
2802 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2803 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2804 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2805 IR_ELEC_FIELD(*ir) ||
2806 (fr->adress_icor != eAdressICOff)
2809 if (fr->cutoff_scheme == ecutsGROUP &&
2810 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2812 /* Count the total number of charge groups */
2813 fr->cg_nalloc = ncg_mtop(mtop);
2814 srenew(fr->cg_cm, fr->cg_nalloc);
2816 if (fr->shift_vec == NULL)
2818 snew(fr->shift_vec, SHIFTS);
2821 if (fr->fshift == NULL)
2823 snew(fr->fshift, SHIFTS);
2826 if (fr->nbfp == NULL)
2828 fr->ntype = mtop->ffparams.atnr;
2829 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2830 if (EVDW_PME(fr->vdwtype))
2832 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2836 /* Copy the energy group exclusions */
2837 fr->egp_flags = ir->opts.egp_flags;
2839 /* Van der Waals stuff */
2840 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2842 if (fr->rvdw_switch >= fr->rvdw)
2844 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2845 fr->rvdw_switch, fr->rvdw);
2849 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2850 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2851 fr->rvdw_switch, fr->rvdw);
2855 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2857 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2860 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2862 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2865 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2867 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2872 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2873 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2876 fr->eDispCorr = ir->eDispCorr;
2877 if (ir->eDispCorr != edispcNO)
2879 set_avcsixtwelve(fp, fr, mtop);
2884 set_bham_b_max(fp, fr, mtop);
2887 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2889 /* Copy the GBSA data (radius, volume and surftens for each
2890 * atomtype) from the topology atomtype section to forcerec.
2892 snew(fr->atype_radius, fr->ntype);
2893 snew(fr->atype_vol, fr->ntype);
2894 snew(fr->atype_surftens, fr->ntype);
2895 snew(fr->atype_gb_radius, fr->ntype);
2896 snew(fr->atype_S_hct, fr->ntype);
2898 if (mtop->atomtypes.nr > 0)
2900 for (i = 0; i < fr->ntype; i++)
2902 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2904 for (i = 0; i < fr->ntype; i++)
2906 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2908 for (i = 0; i < fr->ntype; i++)
2910 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2912 for (i = 0; i < fr->ntype; i++)
2914 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2916 for (i = 0; i < fr->ntype; i++)
2918 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2922 /* Generate the GB table if needed */
2926 fr->gbtabscale = 2000;
2928 fr->gbtabscale = 500;
2932 fr->gbtab = make_gb_table(oenv, fr);
2934 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2936 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2937 if (!DOMAINDECOMP(cr))
2939 make_local_gb(cr, fr->born, ir->gb_algorithm);
2943 /* Set the charge scaling */
2944 if (fr->epsilon_r != 0)
2946 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2950 /* eps = 0 is infinite dieletric: no coulomb interactions */
2954 /* Reaction field constants */
2955 if (EEL_RF(fr->eeltype))
2957 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2958 fr->rcoulomb, fr->temp, fr->zsquare, box,
2959 &fr->kappa, &fr->k_rf, &fr->c_rf);
2962 /*This now calculates sum for q and c6*/
2963 set_chargesum(fp, fr, mtop);
2965 /* if we are using LR electrostatics, and they are tabulated,
2966 * the tables will contain modified coulomb interactions.
2967 * Since we want to use the non-shifted ones for 1-4
2968 * coulombic interactions, we must have an extra set of tables.
2971 /* Construct tables.
2972 * A little unnecessary to make both vdw and coul tables sometimes,
2973 * but what the heck... */
2975 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2976 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2978 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2979 fr->coulomb_modifier != eintmodNONE ||
2980 fr->vdw_modifier != eintmodNONE ||
2981 fr->bBHAM || fr->bEwald) &&
2982 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2983 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2984 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2986 negp_pp = ir->opts.ngener - ir->nwall;
2990 bSomeNormalNbListsAreInUse = TRUE;
2995 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2996 for (egi = 0; egi < negp_pp; egi++)
2998 for (egj = egi; egj < negp_pp; egj++)
3000 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3001 if (!(egp_flags & EGP_EXCL))
3003 if (egp_flags & EGP_TABLE)
3009 bSomeNormalNbListsAreInUse = TRUE;
3014 if (bSomeNormalNbListsAreInUse)
3016 fr->nnblists = negptable + 1;
3020 fr->nnblists = negptable;
3022 if (fr->nnblists > 1)
3024 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3033 snew(fr->nblists, fr->nnblists);
3035 /* This code automatically gives table length tabext without cut-off's,
3036 * in that case grompp should already have checked that we do not need
3037 * normal tables and we only generate tables for 1-4 interactions.
3039 rtab = ir->rlistlong + ir->tabext;
3043 /* make tables for ordinary interactions */
3044 if (bSomeNormalNbListsAreInUse)
3046 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3049 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3051 if (!bMakeSeparate14Table)
3053 fr->tab14 = fr->nblists[0].table_elec_vdw;
3063 /* Read the special tables for certain energy group pairs */
3064 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3065 for (egi = 0; egi < negp_pp; egi++)
3067 for (egj = egi; egj < negp_pp; egj++)
3069 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3070 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3072 if (fr->nnblists > 1)
3074 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3076 /* Read the table file with the two energy groups names appended */
3077 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3078 *mtop->groups.grpname[nm_ind[egi]],
3079 *mtop->groups.grpname[nm_ind[egj]],
3083 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3084 *mtop->groups.grpname[nm_ind[egi]],
3085 *mtop->groups.grpname[nm_ind[egj]],
3086 &fr->nblists[fr->nnblists/2+m]);
3090 else if (fr->nnblists > 1)
3092 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3098 else if ((fr->eDispCorr != edispcNO) &&
3099 ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3100 (fr->vdw_modifier == eintmodFORCESWITCH) ||
3101 (fr->vdw_modifier == eintmodPOTSHIFT)))
3103 /* Tables might not be used for the potential modifier interactions per se, but
3104 * we still need them to evaluate switch/shift dispersion corrections in this case.
3106 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3109 if (bMakeSeparate14Table)
3111 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3112 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3113 GMX_MAKETABLES_14ONLY);
3116 /* Read AdResS Thermo Force table if needed */
3117 if (fr->adress_icor == eAdressICThermoForce)
3119 /* old todo replace */
3121 if (ir->adress->n_tf_grps > 0)
3123 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3128 /* load the default table */
3129 snew(fr->atf_tabs, 1);
3130 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3135 fr->nwall = ir->nwall;
3136 if (ir->nwall && ir->wall_type == ewtTABLE)
3138 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3143 fcd->bondtab = make_bonded_tables(fp,
3144 F_TABBONDS, F_TABBONDSNC,
3146 fcd->angletab = make_bonded_tables(fp,
3149 fcd->dihtab = make_bonded_tables(fp,
3157 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3161 /* QM/MM initialization if requested
3165 fprintf(stderr, "QM/MM calculation requested.\n");
3168 fr->bQMMM = ir->bQMMM;
3169 fr->qr = mk_QMMMrec();
3171 /* Set all the static charge group info */
3172 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3174 &fr->bExcl_IntraCGAll_InterCGNone);
3175 if (DOMAINDECOMP(cr))
3181 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3184 if (!DOMAINDECOMP(cr))
3186 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3187 mtop->natoms, mtop->natoms, mtop->natoms);
3190 fr->print_force = print_force;
3193 /* coarse load balancing vars */
3198 /* Initialize neighbor search */
3199 init_ns(fp, cr, &fr->ns, fr, mtop);
3201 if (cr->duty & DUTY_PP)
3203 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3207 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3212 /* Initialize the thread working data for bonded interactions */
3213 init_bonded_threading(fp, fr, mtop->groups.grps[egcENER].nr);
3215 snew(fr->excl_load, fr->nthreads+1);
3217 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3218 init_interaction_const(fp, &fr->ic, fr);
3220 if (fr->cutoff_scheme == ecutsVERLET)
3222 if (ir->rcoulomb != ir->rvdw)
3224 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3227 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3230 initialize_gpu_constants(cr, fr->ic, fr->nbv);
3231 init_interaction_const_tables(fp, fr->ic,
3232 uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1),
3235 if (ir->eDispCorr != edispcNO)
3237 calc_enervirdiff(fp, ir->eDispCorr, fr);
3241 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3242 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3243 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3245 void pr_forcerec(FILE *fp, t_forcerec *fr)
3249 pr_real(fp, fr->rlist);
3250 pr_real(fp, fr->rcoulomb);
3251 pr_real(fp, fr->fudgeQQ);
3252 pr_bool(fp, fr->bGrid);
3253 pr_bool(fp, fr->bTwinRange);
3254 /*pr_int(fp,fr->cg0);
3255 pr_int(fp,fr->hcg);*/
3256 for (i = 0; i < fr->nnblists; i++)
3258 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3260 pr_real(fp, fr->rcoulomb_switch);
3261 pr_real(fp, fr->rcoulomb);
3266 void forcerec_set_excl_load(t_forcerec *fr,
3267 const gmx_localtop_t *top)
3270 int t, i, j, ntot, n, ntarget;
3272 ind = top->excls.index;
3276 for (i = 0; i < top->excls.nr; i++)
3278 for (j = ind[i]; j < ind[i+1]; j++)
3287 fr->excl_load[0] = 0;
3290 for (t = 1; t <= fr->nthreads; t++)
3292 ntarget = (ntot*t)/fr->nthreads;
3293 while (i < top->excls.nr && n < ntarget)
3295 for (j = ind[i]; j < ind[i+1]; j++)
3304 fr->excl_load[t] = i;
3308 /* Frees GPU memory and destroys the GPU context.
3310 * Note that this function needs to be called even if GPUs are not used
3311 * in this run because the PME ranks have no knowledge of whether GPUs
3312 * are used or not, but all ranks need to enter the barrier below.
3314 void free_gpu_resources(const t_forcerec *fr,
3315 const t_commrec *cr,
3316 const gmx_gpu_info_t *gpu_info,
3317 const gmx_gpu_opt_t *gpu_opt)
3319 gmx_bool bIsPPrankUsingGPU;
3320 char gpu_err_str[STRLEN];
3322 bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
3324 if (bIsPPrankUsingGPU)
3326 /* free nbnxn data in GPU memory */
3327 nbnxn_gpu_free(fr->nbv->gpu_nbv);
3329 /* With tMPI we need to wait for all ranks to finish deallocation before
3330 * destroying the context in free_gpu() as some ranks may be sharing
3332 * Note: as only PP ranks need to free GPU resources, so it is safe to
3333 * not call the barrier on PME ranks.
3335 #ifdef GMX_THREAD_MPI
3340 #endif /* GMX_THREAD_MPI */
3342 /* uninitialize GPU (by destroying the context) */
3343 if (!free_cuda_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
3345 gmx_warning("On rank %d failed to free GPU #%d: %s",
3346 cr->nodeid, get_current_cuda_gpu_device_id(), gpu_err_str);