2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/legacyheaders/typedefs.h"
44 #include "gromacs/legacyheaders/types/commrec.h"
45 #include "gromacs/math/vec.h"
46 #include "gromacs/math/utilities.h"
47 #include "gromacs/legacyheaders/macros.h"
48 #include "gromacs/legacyheaders/macros.h"
49 #include "gromacs/math/units.h"
50 #include "gromacs/legacyheaders/force.h"
51 #include "gromacs/legacyheaders/tables.h"
52 #include "gromacs/legacyheaders/nonbonded.h"
53 #include "gromacs/legacyheaders/names.h"
54 #include "gromacs/legacyheaders/network.h"
55 #include "gromacs/legacyheaders/ns.h"
56 #include "gromacs/legacyheaders/txtdump.h"
57 #include "gromacs/legacyheaders/coulomb.h"
58 #include "gromacs/legacyheaders/md_support.h"
59 #include "gromacs/legacyheaders/md_logging.h"
60 #include "gromacs/legacyheaders/domdec.h"
61 #include "gromacs/legacyheaders/qmmm.h"
62 #include "gromacs/legacyheaders/copyrite.h"
63 #include "gromacs/topology/mtop_util.h"
64 #include "nbnxn_simd.h"
65 #include "nbnxn_search.h"
66 #include "nbnxn_atomdata.h"
67 #include "nbnxn_consts.h"
68 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
69 #include "gromacs/legacyheaders/gmx_detect_hardware.h"
70 #include "gromacs/legacyheaders/inputrec.h"
72 #include "gromacs/pbcutil/ishift.h"
73 #include "gromacs/pbcutil/pbc.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/smalloc.h"
77 #include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
78 #include "gromacs/legacyheaders/gpu_utils.h"
79 #include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
80 #include "gromacs/legacyheaders/pmalloc_cuda.h"
81 #include "nb_verlet.h"
83 t_forcerec *mk_forcerec(void)
93 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
97 for (i = 0; (i < atnr); i++)
99 for (j = 0; (j < atnr); j++)
101 fprintf(fp, "%2d - %2d", i, j);
104 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
105 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
109 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
110 C12(nbfp, atnr, i, j)/12.0);
117 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
125 snew(nbfp, 3*atnr*atnr);
126 for (i = k = 0; (i < atnr); i++)
128 for (j = 0; (j < atnr); j++, k++)
130 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
131 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
132 /* nbfp now includes the 6.0 derivative prefactor */
133 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
139 snew(nbfp, 2*atnr*atnr);
140 for (i = k = 0; (i < atnr); i++)
142 for (j = 0; (j < atnr); j++, k++)
144 /* nbfp now includes the 6.0/12.0 derivative prefactors */
145 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
146 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
154 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
157 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
160 /* For LJ-PME simulations, we correct the energies with the reciprocal space
161 * inside of the cut-off. To do this the non-bonded kernels needs to have
162 * access to the C6-values used on the reciprocal grid in pme.c
166 snew(grid, 2*atnr*atnr);
167 for (i = k = 0; (i < atnr); i++)
169 for (j = 0; (j < atnr); j++, k++)
171 c6i = idef->iparams[i*(atnr+1)].lj.c6;
172 c12i = idef->iparams[i*(atnr+1)].lj.c12;
173 c6j = idef->iparams[j*(atnr+1)].lj.c6;
174 c12j = idef->iparams[j*(atnr+1)].lj.c12;
175 c6 = sqrt(c6i * c6j);
176 if (fr->ljpme_combination_rule == eljpmeLB
177 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
179 sigmai = pow(c12i / c6i, 1.0/6.0);
180 sigmaj = pow(c12j / c6j, 1.0/6.0);
181 epsi = c6i * c6i / c12i;
182 epsj = c6j * c6j / c12j;
183 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
185 /* Store the elements at the same relative positions as C6 in nbfp in order
186 * to simplify access in the kernels
188 grid[2*(atnr*i+j)] = c6*6.0;
194 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
198 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
202 snew(nbfp, 2*atnr*atnr);
203 for (i = 0; i < atnr; ++i)
205 for (j = 0; j < atnr; ++j)
207 c6i = idef->iparams[i*(atnr+1)].lj.c6;
208 c12i = idef->iparams[i*(atnr+1)].lj.c12;
209 c6j = idef->iparams[j*(atnr+1)].lj.c6;
210 c12j = idef->iparams[j*(atnr+1)].lj.c12;
211 c6 = sqrt(c6i * c6j);
212 c12 = sqrt(c12i * c12j);
213 if (comb_rule == eCOMB_ARITHMETIC
214 && !gmx_numzero(c6) && !gmx_numzero(c12))
216 sigmai = pow(c12i / c6i, 1.0/6.0);
217 sigmaj = pow(c12j / c6j, 1.0/6.0);
218 epsi = c6i * c6i / c12i;
219 epsj = c6j * c6j / c12j;
220 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
221 c12 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
223 C6(nbfp, atnr, i, j) = c6*6.0;
224 C12(nbfp, atnr, i, j) = c12*12.0;
230 /* This routine sets fr->solvent_opt to the most common solvent in the
231 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
232 * the fr->solvent_type array with the correct type (or esolNO).
234 * Charge groups that fulfill the conditions but are not identical to the
235 * most common one will be marked as esolNO in the solvent_type array.
237 * TIP3p is identical to SPC for these purposes, so we call it
238 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
240 * NOTE: QM particle should not
241 * become an optimized solvent. Not even if there is only one charge
251 } solvent_parameters_t;
254 check_solvent_cg(const gmx_moltype_t *molt,
257 const unsigned char *qm_grpnr,
258 const t_grps *qm_grps,
260 int *n_solvent_parameters,
261 solvent_parameters_t **solvent_parameters_p,
265 const t_blocka *excl;
272 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
273 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
276 solvent_parameters_t *solvent_parameters;
278 /* We use a list with parameters for each solvent type.
279 * Every time we discover a new molecule that fulfills the basic
280 * conditions for a solvent we compare with the previous entries
281 * in these lists. If the parameters are the same we just increment
282 * the counter for that type, and otherwise we create a new type
283 * based on the current molecule.
285 * Once we've finished going through all molecules we check which
286 * solvent is most common, and mark all those molecules while we
287 * clear the flag on all others.
290 solvent_parameters = *solvent_parameters_p;
292 /* Mark the cg first as non optimized */
295 /* Check if this cg has no exclusions with atoms in other charge groups
296 * and all atoms inside the charge group excluded.
297 * We only have 3 or 4 atom solvent loops.
299 if (GET_CGINFO_EXCL_INTER(cginfo) ||
300 !GET_CGINFO_EXCL_INTRA(cginfo))
305 /* Get the indices of the first atom in this charge group */
306 j0 = molt->cgs.index[cg0];
307 j1 = molt->cgs.index[cg0+1];
309 /* Number of atoms in our molecule */
315 "Moltype '%s': there are %d atoms in this charge group\n",
319 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
322 if (nj < 3 || nj > 4)
327 /* Check if we are doing QM on this group */
329 if (qm_grpnr != NULL)
331 for (j = j0; j < j1 && !qm; j++)
333 qm = (qm_grpnr[j] < qm_grps->nr - 1);
336 /* Cannot use solvent optimization with QM */
342 atom = molt->atoms.atom;
344 /* Still looks like a solvent, time to check parameters */
346 /* If it is perturbed (free energy) we can't use the solvent loops,
347 * so then we just skip to the next molecule.
351 for (j = j0; j < j1 && !perturbed; j++)
353 perturbed = PERTURBED(atom[j]);
361 /* Now it's only a question if the VdW and charge parameters
362 * are OK. Before doing the check we compare and see if they are
363 * identical to a possible previous solvent type.
364 * First we assign the current types and charges.
366 for (j = 0; j < nj; j++)
368 tmp_vdwtype[j] = atom[j0+j].type;
369 tmp_charge[j] = atom[j0+j].q;
372 /* Does it match any previous solvent type? */
373 for (k = 0; k < *n_solvent_parameters; k++)
378 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
379 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
380 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
385 /* Check that types & charges match for all atoms in molecule */
386 for (j = 0; j < nj && match == TRUE; j++)
388 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
392 if (tmp_charge[j] != solvent_parameters[k].charge[j])
399 /* Congratulations! We have a matched solvent.
400 * Flag it with this type for later processing.
403 solvent_parameters[k].count += nmol;
405 /* We are done with this charge group */
410 /* If we get here, we have a tentative new solvent type.
411 * Before we add it we must check that it fulfills the requirements
412 * of the solvent optimized loops. First determine which atoms have
415 for (j = 0; j < nj; j++)
418 tjA = tmp_vdwtype[j];
420 /* Go through all other tpes and see if any have non-zero
421 * VdW parameters when combined with this one.
423 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
425 /* We already checked that the atoms weren't perturbed,
426 * so we only need to check state A now.
430 has_vdw[j] = (has_vdw[j] ||
431 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
432 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
433 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
438 has_vdw[j] = (has_vdw[j] ||
439 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
440 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
445 /* Now we know all we need to make the final check and assignment. */
449 * For this we require thatn all atoms have charge,
450 * the charges on atom 2 & 3 should be the same, and only
451 * atom 1 might have VdW.
453 if (has_vdw[1] == FALSE &&
454 has_vdw[2] == FALSE &&
455 tmp_charge[0] != 0 &&
456 tmp_charge[1] != 0 &&
457 tmp_charge[2] == tmp_charge[1])
459 srenew(solvent_parameters, *n_solvent_parameters+1);
460 solvent_parameters[*n_solvent_parameters].model = esolSPC;
461 solvent_parameters[*n_solvent_parameters].count = nmol;
462 for (k = 0; k < 3; k++)
464 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
465 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
468 *cg_sp = *n_solvent_parameters;
469 (*n_solvent_parameters)++;
474 /* Or could it be a TIP4P?
475 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
476 * Only atom 1 mght have VdW.
478 if (has_vdw[1] == FALSE &&
479 has_vdw[2] == FALSE &&
480 has_vdw[3] == FALSE &&
481 tmp_charge[0] == 0 &&
482 tmp_charge[1] != 0 &&
483 tmp_charge[2] == tmp_charge[1] &&
486 srenew(solvent_parameters, *n_solvent_parameters+1);
487 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
488 solvent_parameters[*n_solvent_parameters].count = nmol;
489 for (k = 0; k < 4; k++)
491 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
492 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
495 *cg_sp = *n_solvent_parameters;
496 (*n_solvent_parameters)++;
500 *solvent_parameters_p = solvent_parameters;
504 check_solvent(FILE * fp,
505 const gmx_mtop_t * mtop,
507 cginfo_mb_t *cginfo_mb)
510 const t_block * mols;
511 const gmx_moltype_t *molt;
512 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
513 int n_solvent_parameters;
514 solvent_parameters_t *solvent_parameters;
520 fprintf(debug, "Going to determine what solvent types we have.\n");
525 n_solvent_parameters = 0;
526 solvent_parameters = NULL;
527 /* Allocate temporary array for solvent type */
528 snew(cg_sp, mtop->nmolblock);
532 for (mb = 0; mb < mtop->nmolblock; mb++)
534 molt = &mtop->moltype[mtop->molblock[mb].type];
536 /* Here we have to loop over all individual molecules
537 * because we need to check for QMMM particles.
539 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
540 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
541 nmol = mtop->molblock[mb].nmol/nmol_ch;
542 for (mol = 0; mol < nmol_ch; mol++)
545 am = mol*cgs->index[cgs->nr];
546 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
548 check_solvent_cg(molt, cg_mol, nmol,
549 mtop->groups.grpnr[egcQMMM] ?
550 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
551 &mtop->groups.grps[egcQMMM],
553 &n_solvent_parameters, &solvent_parameters,
554 cginfo_mb[mb].cginfo[cgm+cg_mol],
555 &cg_sp[mb][cgm+cg_mol]);
558 cg_offset += cgs->nr;
559 at_offset += cgs->index[cgs->nr];
562 /* Puh! We finished going through all charge groups.
563 * Now find the most common solvent model.
566 /* Most common solvent this far */
568 for (i = 0; i < n_solvent_parameters; i++)
571 solvent_parameters[i].count > solvent_parameters[bestsp].count)
579 bestsol = solvent_parameters[bestsp].model;
586 #ifdef DISABLE_WATER_NLIST
591 for (mb = 0; mb < mtop->nmolblock; mb++)
593 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
594 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
595 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
597 if (cg_sp[mb][i] == bestsp)
599 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
604 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
611 if (bestsol != esolNO && fp != NULL)
613 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
615 solvent_parameters[bestsp].count);
618 sfree(solvent_parameters);
619 fr->solvent_opt = bestsol;
623 acNONE = 0, acCONSTRAINT, acSETTLE
626 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
627 t_forcerec *fr, gmx_bool bNoSolvOpt,
628 gmx_bool *bFEP_NonBonded,
629 gmx_bool *bExcl_IntraCGAll_InterCGNone)
632 const t_blocka *excl;
633 const gmx_moltype_t *molt;
634 const gmx_molblock_t *molb;
635 cginfo_mb_t *cginfo_mb;
638 int cg_offset, a_offset, cgm, am;
639 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
643 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
645 ncg_tot = ncg_mtop(mtop);
646 snew(cginfo_mb, mtop->nmolblock);
648 snew(type_VDW, fr->ntype);
649 for (ai = 0; ai < fr->ntype; ai++)
651 type_VDW[ai] = FALSE;
652 for (j = 0; j < fr->ntype; j++)
654 type_VDW[ai] = type_VDW[ai] ||
656 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
657 C12(fr->nbfp, fr->ntype, ai, j) != 0;
661 *bFEP_NonBonded = FALSE;
662 *bExcl_IntraCGAll_InterCGNone = TRUE;
665 snew(bExcl, excl_nalloc);
668 for (mb = 0; mb < mtop->nmolblock; mb++)
670 molb = &mtop->molblock[mb];
671 molt = &mtop->moltype[molb->type];
675 /* Check if the cginfo is identical for all molecules in this block.
676 * If so, we only need an array of the size of one molecule.
677 * Otherwise we make an array of #mol times #cgs per molecule.
681 for (m = 0; m < molb->nmol; m++)
683 am = m*cgs->index[cgs->nr];
684 for (cg = 0; cg < cgs->nr; cg++)
687 a1 = cgs->index[cg+1];
688 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
689 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
693 if (mtop->groups.grpnr[egcQMMM] != NULL)
695 for (ai = a0; ai < a1; ai++)
697 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
698 mtop->groups.grpnr[egcQMMM][a_offset +ai])
707 cginfo_mb[mb].cg_start = cg_offset;
708 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
709 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
710 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
711 cginfo = cginfo_mb[mb].cginfo;
713 /* Set constraints flags for constrained atoms */
714 snew(a_con, molt->atoms.nr);
715 for (ftype = 0; ftype < F_NRE; ftype++)
717 if (interaction_function[ftype].flags & IF_CONSTRAINT)
722 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
726 for (a = 0; a < nral; a++)
728 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
729 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
735 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
738 am = m*cgs->index[cgs->nr];
739 for (cg = 0; cg < cgs->nr; cg++)
742 a1 = cgs->index[cg+1];
744 /* Store the energy group in cginfo */
745 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
746 SET_CGINFO_GID(cginfo[cgm+cg], gid);
748 /* Check the intra/inter charge group exclusions */
749 if (a1-a0 > excl_nalloc)
751 excl_nalloc = a1 - a0;
752 srenew(bExcl, excl_nalloc);
754 /* bExclIntraAll: all intra cg interactions excluded
755 * bExclInter: any inter cg interactions excluded
757 bExclIntraAll = TRUE;
761 bHavePerturbedAtoms = FALSE;
762 for (ai = a0; ai < a1; ai++)
764 /* Check VDW and electrostatic interactions */
765 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
766 type_VDW[molt->atoms.atom[ai].typeB]);
767 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
768 molt->atoms.atom[ai].qB != 0);
770 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
772 /* Clear the exclusion list for atom ai */
773 for (aj = a0; aj < a1; aj++)
775 bExcl[aj-a0] = FALSE;
777 /* Loop over all the exclusions of atom ai */
778 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
781 if (aj < a0 || aj >= a1)
790 /* Check if ai excludes a0 to a1 */
791 for (aj = a0; aj < a1; aj++)
795 bExclIntraAll = FALSE;
802 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
805 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
813 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
817 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
819 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
821 /* The size in cginfo is currently only read with DD */
822 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
826 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
830 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
832 if (bHavePerturbedAtoms && fr->efep != efepNO)
834 SET_CGINFO_FEP(cginfo[cgm+cg]);
835 *bFEP_NonBonded = TRUE;
837 /* Store the charge group size */
838 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
840 if (!bExclIntraAll || bExclInter)
842 *bExcl_IntraCGAll_InterCGNone = FALSE;
849 cg_offset += molb->nmol*cgs->nr;
850 a_offset += molb->nmol*cgs->index[cgs->nr];
854 /* the solvent optimizer is called after the QM is initialized,
855 * because we don't want to have the QM subsystemto become an
859 check_solvent(fplog, mtop, fr, cginfo_mb);
861 if (getenv("GMX_NO_SOLV_OPT"))
865 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
866 "Disabling all solvent optimization\n");
868 fr->solvent_opt = esolNO;
872 fr->solvent_opt = esolNO;
874 if (!fr->solvent_opt)
876 for (mb = 0; mb < mtop->nmolblock; mb++)
878 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
880 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
888 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
893 ncg = cgi_mb[nmb-1].cg_end;
896 for (cg = 0; cg < ncg; cg++)
898 while (cg >= cgi_mb[mb].cg_end)
903 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
909 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
911 /*This now calculates sum for q and c6*/
912 double qsum, q2sum, q, c6sum, c6;
914 const t_atoms *atoms;
919 for (mb = 0; mb < mtop->nmolblock; mb++)
921 nmol = mtop->molblock[mb].nmol;
922 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
923 for (i = 0; i < atoms->nr; i++)
925 q = atoms->atom[i].q;
928 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
933 fr->q2sum[0] = q2sum;
934 fr->c6sum[0] = c6sum;
936 if (fr->efep != efepNO)
941 for (mb = 0; mb < mtop->nmolblock; mb++)
943 nmol = mtop->molblock[mb].nmol;
944 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
945 for (i = 0; i < atoms->nr; i++)
947 q = atoms->atom[i].qB;
950 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
954 fr->q2sum[1] = q2sum;
955 fr->c6sum[1] = c6sum;
960 fr->qsum[1] = fr->qsum[0];
961 fr->q2sum[1] = fr->q2sum[0];
962 fr->c6sum[1] = fr->c6sum[0];
966 if (fr->efep == efepNO)
968 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
972 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
973 fr->qsum[0], fr->qsum[1]);
978 void update_forcerec(t_forcerec *fr, matrix box)
980 if (fr->eeltype == eelGRF)
982 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
983 fr->rcoulomb, fr->temp, fr->zsquare, box,
984 &fr->kappa, &fr->k_rf, &fr->c_rf);
988 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
990 const t_atoms *atoms, *atoms_tpi;
991 const t_blocka *excl;
992 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
993 gmx_int64_t npair, npair_ij, tmpi, tmpj;
994 double csix, ctwelve;
998 real *nbfp_comb = NULL;
1004 /* For LJ-PME, we want to correct for the difference between the
1005 * actual C6 values and the C6 values used by the LJ-PME based on
1006 * combination rules. */
1008 if (EVDW_PME(fr->vdwtype))
1010 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1011 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1012 for (tpi = 0; tpi < ntp; ++tpi)
1014 for (tpj = 0; tpj < ntp; ++tpj)
1016 C6(nbfp_comb, ntp, tpi, tpj) =
1017 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1018 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1023 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1031 /* Count the types so we avoid natoms^2 operations */
1032 snew(typecount, ntp);
1033 gmx_mtop_count_atomtypes(mtop, q, typecount);
1035 for (tpi = 0; tpi < ntp; tpi++)
1037 for (tpj = tpi; tpj < ntp; tpj++)
1039 tmpi = typecount[tpi];
1040 tmpj = typecount[tpj];
1043 npair_ij = tmpi*tmpj;
1047 npair_ij = tmpi*(tmpi - 1)/2;
1051 /* nbfp now includes the 6.0 derivative prefactor */
1052 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1056 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1057 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1058 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1064 /* Subtract the excluded pairs.
1065 * The main reason for substracting exclusions is that in some cases
1066 * some combinations might never occur and the parameters could have
1067 * any value. These unused values should not influence the dispersion
1070 for (mb = 0; mb < mtop->nmolblock; mb++)
1072 nmol = mtop->molblock[mb].nmol;
1073 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1074 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1075 for (i = 0; (i < atoms->nr); i++)
1079 tpi = atoms->atom[i].type;
1083 tpi = atoms->atom[i].typeB;
1085 j1 = excl->index[i];
1086 j2 = excl->index[i+1];
1087 for (j = j1; j < j2; j++)
1094 tpj = atoms->atom[k].type;
1098 tpj = atoms->atom[k].typeB;
1102 /* nbfp now includes the 6.0 derivative prefactor */
1103 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1107 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1108 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1109 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1119 /* Only correct for the interaction of the test particle
1120 * with the rest of the system.
1123 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1126 for (mb = 0; mb < mtop->nmolblock; mb++)
1128 nmol = mtop->molblock[mb].nmol;
1129 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1130 for (j = 0; j < atoms->nr; j++)
1133 /* Remove the interaction of the test charge group
1136 if (mb == mtop->nmolblock-1)
1140 if (mb == 0 && nmol == 1)
1142 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1147 tpj = atoms->atom[j].type;
1151 tpj = atoms->atom[j].typeB;
1153 for (i = 0; i < fr->n_tpi; i++)
1157 tpi = atoms_tpi->atom[i].type;
1161 tpi = atoms_tpi->atom[i].typeB;
1165 /* nbfp now includes the 6.0 derivative prefactor */
1166 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1170 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1171 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1172 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1179 if (npair - nexcl <= 0 && fplog)
1181 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1187 csix /= npair - nexcl;
1188 ctwelve /= npair - nexcl;
1192 fprintf(debug, "Counted %d exclusions\n", nexcl);
1193 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1194 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1196 fr->avcsix[q] = csix;
1197 fr->avctwelve[q] = ctwelve;
1200 if (EVDW_PME(fr->vdwtype))
1207 if (fr->eDispCorr == edispcAllEner ||
1208 fr->eDispCorr == edispcAllEnerPres)
1210 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1211 fr->avcsix[0], fr->avctwelve[0]);
1215 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1221 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1222 const gmx_mtop_t *mtop)
1224 const t_atoms *at1, *at2;
1225 int mt1, mt2, i, j, tpi, tpj, ntypes;
1231 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1238 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1240 at1 = &mtop->moltype[mt1].atoms;
1241 for (i = 0; (i < at1->nr); i++)
1243 tpi = at1->atom[i].type;
1246 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1249 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1251 at2 = &mtop->moltype[mt2].atoms;
1252 for (j = 0; (j < at2->nr); j++)
1254 tpj = at2->atom[j].type;
1257 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1259 b = BHAMB(nbfp, ntypes, tpi, tpj);
1260 if (b > fr->bham_b_max)
1264 if ((b < bmin) || (bmin == -1))
1274 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1275 bmin, fr->bham_b_max);
1279 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1280 t_forcerec *fr, real rtab,
1281 const t_commrec *cr,
1282 const char *tabfn, char *eg1, char *eg2,
1292 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1297 sprintf(buf, "%s", tabfn);
1300 /* Append the two energy group names */
1301 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1302 eg1, eg2, ftp2ext(efXVG));
1304 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1305 /* Copy the contents of the table to separate coulomb and LJ tables too,
1306 * to improve cache performance.
1308 /* For performance reasons we want
1309 * the table data to be aligned to 16-byte. The pointers could be freed
1310 * but currently aren't.
1312 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1313 nbl->table_elec.format = nbl->table_elec_vdw.format;
1314 nbl->table_elec.r = nbl->table_elec_vdw.r;
1315 nbl->table_elec.n = nbl->table_elec_vdw.n;
1316 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1317 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1318 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1319 nbl->table_elec.ninteractions = 1;
1320 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1321 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1323 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1324 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1325 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1326 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1327 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1328 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1329 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1330 nbl->table_vdw.ninteractions = 2;
1331 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1332 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1334 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1336 for (j = 0; j < 4; j++)
1338 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1340 for (j = 0; j < 8; j++)
1342 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1347 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1348 int *ncount, int **count)
1350 const gmx_moltype_t *molt;
1352 int mt, ftype, stride, i, j, tabnr;
1354 for (mt = 0; mt < mtop->nmoltype; mt++)
1356 molt = &mtop->moltype[mt];
1357 for (ftype = 0; ftype < F_NRE; ftype++)
1359 if (ftype == ftype1 || ftype == ftype2)
1361 il = &molt->ilist[ftype];
1362 stride = 1 + NRAL(ftype);
1363 for (i = 0; i < il->nr; i += stride)
1365 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1368 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1370 if (tabnr >= *ncount)
1372 srenew(*count, tabnr+1);
1373 for (j = *ncount; j < tabnr+1; j++)
1386 static bondedtable_t *make_bonded_tables(FILE *fplog,
1387 int ftype1, int ftype2,
1388 const gmx_mtop_t *mtop,
1389 const char *basefn, const char *tabext)
1391 int i, ncount, *count;
1399 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1404 for (i = 0; i < ncount; i++)
1408 sprintf(tabfn, "%s", basefn);
1409 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1410 tabext, i, ftp2ext(efXVG));
1411 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1420 void forcerec_set_ranges(t_forcerec *fr,
1421 int ncg_home, int ncg_force,
1423 int natoms_force_constr, int natoms_f_novirsum)
1428 /* fr->ncg_force is unused in the standard code,
1429 * but it can be useful for modified code dealing with charge groups.
1431 fr->ncg_force = ncg_force;
1432 fr->natoms_force = natoms_force;
1433 fr->natoms_force_constr = natoms_force_constr;
1435 if (fr->natoms_force_constr > fr->nalloc_force)
1437 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1441 srenew(fr->f_twin, fr->nalloc_force);
1445 if (fr->bF_NoVirSum)
1447 fr->f_novirsum_n = natoms_f_novirsum;
1448 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1450 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1451 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1456 fr->f_novirsum_n = 0;
1460 static real cutoff_inf(real cutoff)
1464 cutoff = GMX_CUTOFF_INF;
1470 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1471 t_forcerec *fr, const t_inputrec *ir,
1472 const char *tabfn, const gmx_mtop_t *mtop,
1480 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1484 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1486 sprintf(buf, "%s", tabfn);
1487 for (i = 0; i < ir->adress->n_tf_grps; i++)
1489 j = ir->adress->tf_table_index[i]; /* get energy group index */
1490 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1491 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1494 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1496 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1501 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1508 ir->rcoulomb == 0 &&
1510 ir->ePBC == epbcNONE &&
1511 ir->vdwtype == evdwCUT &&
1512 ir->coulombtype == eelCUT &&
1513 ir->efep == efepNO &&
1514 (ir->implicit_solvent == eisNO ||
1515 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1516 ir->gb_algorithm == egbHCT ||
1517 ir->gb_algorithm == egbOBC))) &&
1518 getenv("GMX_NO_ALLVSALL") == NULL
1521 if (bAllvsAll && ir->opts.ngener > 1)
1523 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1529 fprintf(stderr, "\n%s\n", note);
1533 fprintf(fp, "\n%s\n", note);
1539 if (bAllvsAll && fp && MASTER(cr))
1541 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1548 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1552 /* These thread local data structures are used for bondeds only */
1553 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1555 if (fr->nthreads > 1)
1557 snew(fr->f_t, fr->nthreads);
1558 /* Thread 0 uses the global force and energy arrays */
1559 for (t = 1; t < fr->nthreads; t++)
1561 fr->f_t[t].f = NULL;
1562 fr->f_t[t].f_nalloc = 0;
1563 snew(fr->f_t[t].fshift, SHIFTS);
1564 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1565 for (i = 0; i < egNR; i++)
1567 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1574 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1575 const t_commrec *cr,
1576 const t_inputrec *ir,
1579 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1581 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1582 bGPU ? "GPUs" : "SIMD kernels",
1583 bGPU ? "CPU only" : "plain-C kernels");
1591 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1595 *kernel_type = nbnxnk4x4_PlainC;
1596 *ewald_excl = ewaldexclTable;
1598 #ifdef GMX_NBNXN_SIMD
1600 #ifdef GMX_NBNXN_SIMD_4XN
1601 *kernel_type = nbnxnk4xN_SIMD_4xN;
1603 #ifdef GMX_NBNXN_SIMD_2XNN
1604 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1607 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1608 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1609 * Currently this is based on the SIMD acceleration choice,
1610 * but it might be better to decide this at runtime based on CPU.
1612 * 4xN calculates more (zero) interactions, but has less pair-search
1613 * work and much better kernel instruction scheduling.
1615 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1616 * which doesn't have FMA, both the analytical and tabulated Ewald
1617 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1618 * 2x(4+4) because it results in significantly fewer pairs.
1619 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1620 * 10% with HT, 50% without HT. As we currently don't detect the actual
1621 * use of HT, use 4x8 to avoid a potential performance hit.
1622 * On Intel Haswell 4x8 is always faster.
1624 *kernel_type = nbnxnk4xN_SIMD_4xN;
1626 #ifndef GMX_SIMD_HAVE_FMA
1627 if (EEL_PME_EWALD(ir->coulombtype) ||
1628 EVDW_PME(ir->vdwtype))
1630 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1631 * There are enough instructions to make 2x(4+4) efficient.
1633 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1636 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1639 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1641 #ifdef GMX_NBNXN_SIMD_4XN
1642 *kernel_type = nbnxnk4xN_SIMD_4xN;
1644 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1647 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1649 #ifdef GMX_NBNXN_SIMD_2XNN
1650 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1652 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1656 /* Analytical Ewald exclusion correction is only an option in
1658 * Since table lookup's don't parallelize with SIMD, analytical
1659 * will probably always be faster for a SIMD width of 8 or more.
1660 * With FMA analytical is sometimes faster for a width if 4 as well.
1661 * On BlueGene/Q, this is faster regardless of precision.
1662 * In single precision, this is faster on Bulldozer.
1664 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1665 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1666 defined GMX_SIMD_IBM_QPX
1667 *ewald_excl = ewaldexclAnalytical;
1669 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1671 *ewald_excl = ewaldexclTable;
1673 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1675 *ewald_excl = ewaldexclAnalytical;
1679 #endif /* GMX_NBNXN_SIMD */
1683 const char *lookup_nbnxn_kernel_name(int kernel_type)
1685 const char *returnvalue = NULL;
1686 switch (kernel_type)
1689 returnvalue = "not set";
1691 case nbnxnk4x4_PlainC:
1692 returnvalue = "plain C";
1694 case nbnxnk4xN_SIMD_4xN:
1695 case nbnxnk4xN_SIMD_2xNN:
1696 #ifdef GMX_NBNXN_SIMD
1697 #if defined GMX_SIMD_X86_SSE2
1698 returnvalue = "SSE2";
1699 #elif defined GMX_SIMD_X86_SSE4_1
1700 returnvalue = "SSE4.1";
1701 #elif defined GMX_SIMD_X86_AVX_128_FMA
1702 returnvalue = "AVX_128_FMA";
1703 #elif defined GMX_SIMD_X86_AVX_256
1704 returnvalue = "AVX_256";
1705 #elif defined GMX_SIMD_X86_AVX2_256
1706 returnvalue = "AVX2_256";
1708 returnvalue = "SIMD";
1710 #else /* GMX_NBNXN_SIMD */
1711 returnvalue = "not available";
1712 #endif /* GMX_NBNXN_SIMD */
1714 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1715 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1719 gmx_fatal(FARGS, "Illegal kernel type selected");
1726 static void pick_nbnxn_kernel(FILE *fp,
1727 const t_commrec *cr,
1728 gmx_bool use_simd_kernels,
1730 gmx_bool bEmulateGPU,
1731 const t_inputrec *ir,
1734 gmx_bool bDoNonbonded)
1736 assert(kernel_type);
1738 *kernel_type = nbnxnkNotSet;
1739 *ewald_excl = ewaldexclTable;
1743 *kernel_type = nbnxnk8x8x8_PlainC;
1747 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1752 *kernel_type = nbnxnk8x8x8_CUDA;
1755 if (*kernel_type == nbnxnkNotSet)
1757 /* LJ PME with LB combination rule does 7 mesh operations.
1758 * This so slow that we don't compile SIMD non-bonded kernels for that.
1760 if (use_simd_kernels &&
1761 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1763 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1767 *kernel_type = nbnxnk4x4_PlainC;
1771 if (bDoNonbonded && fp != NULL)
1773 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1774 lookup_nbnxn_kernel_name(*kernel_type),
1775 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1776 nbnxn_kernel_to_cj_size(*kernel_type));
1778 if (nbnxnk4x4_PlainC == *kernel_type ||
1779 nbnxnk8x8x8_PlainC == *kernel_type)
1781 md_print_warn(cr, fp,
1782 "WARNING: Using the slow %s kernels. This should\n"
1783 "not happen during routine usage on supported platforms.\n\n",
1784 lookup_nbnxn_kernel_name(*kernel_type));
1789 static void pick_nbnxn_resources(const t_commrec *cr,
1790 const gmx_hw_info_t *hwinfo,
1791 gmx_bool bDoNonbonded,
1793 gmx_bool *bEmulateGPU,
1794 const gmx_gpu_opt_t *gpu_opt)
1796 gmx_bool bEmulateGPUEnvVarSet;
1797 char gpu_err_str[STRLEN];
1801 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1803 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1804 * GPUs (currently) only handle non-bonded calculations, we will
1805 * automatically switch to emulation if non-bonded calculations are
1806 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1807 * way to turn off GPU initialization, data movement, and cleanup.
1809 * GPU emulation can be useful to assess the performance one can expect by
1810 * adding GPU(s) to the machine. The conditional below allows this even
1811 * if mdrun is compiled without GPU acceleration support.
1812 * Note that you should freezing the system as otherwise it will explode.
1814 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1816 gpu_opt->ncuda_dev_use > 0));
1818 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1820 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1822 /* Each PP node will use the intra-node id-th device from the
1823 * list of detected/selected GPUs. */
1824 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1825 &hwinfo->gpu_info, gpu_opt))
1827 /* At this point the init should never fail as we made sure that
1828 * we have all the GPUs we need. If it still does, we'll bail. */
1829 gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1831 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1832 cr->rank_pp_intranode),
1836 /* Here we actually turn on hardware GPU acceleration */
1841 gmx_bool uses_simple_tables(int cutoff_scheme,
1842 nonbonded_verlet_t *nbv,
1845 gmx_bool bUsesSimpleTables = TRUE;
1848 switch (cutoff_scheme)
1851 bUsesSimpleTables = TRUE;
1854 assert(NULL != nbv && NULL != nbv->grp);
1855 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1856 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1859 gmx_incons("unimplemented");
1861 return bUsesSimpleTables;
1864 static void init_ewald_f_table(interaction_const_t *ic,
1865 gmx_bool bUsesSimpleTables,
1870 if (bUsesSimpleTables)
1872 /* Get the Ewald table spacing based on Coulomb and/or LJ
1873 * Ewald coefficients and rtol.
1875 ic->tabq_scale = ewald_spline3_table_scale(ic);
1877 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1878 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1882 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1883 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1884 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1887 sfree_aligned(ic->tabq_coul_FDV0);
1888 sfree_aligned(ic->tabq_coul_F);
1889 sfree_aligned(ic->tabq_coul_V);
1891 sfree_aligned(ic->tabq_vdw_FDV0);
1892 sfree_aligned(ic->tabq_vdw_F);
1893 sfree_aligned(ic->tabq_vdw_V);
1895 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1897 /* Create the original table data in FDV0 */
1898 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1899 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1900 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1901 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1902 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1905 if (EVDW_PME(ic->vdwtype))
1907 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1908 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1909 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1910 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1911 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1915 void init_interaction_const_tables(FILE *fp,
1916 interaction_const_t *ic,
1917 gmx_bool bUsesSimpleTables,
1922 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1924 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1928 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1929 1/ic->tabq_scale, ic->tabq_size);
1934 static void clear_force_switch_constants(shift_consts_t *sc)
1941 static void force_switch_constants(real p,
1945 /* Here we determine the coefficient for shifting the force to zero
1946 * between distance rsw and the cut-off rc.
1947 * For a potential of r^-p, we have force p*r^-(p+1).
1948 * But to save flops we absorb p in the coefficient.
1950 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1951 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1953 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1954 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1955 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1958 static void potential_switch_constants(real rsw, real rc,
1959 switch_consts_t *sc)
1961 /* The switch function is 1 at rsw and 0 at rc.
1962 * The derivative and second derivate are zero at both ends.
1963 * rsw = max(r - r_switch, 0)
1964 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1965 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1966 * force = force*dsw - potential*sw
1969 sc->c3 = -10*pow(rc - rsw, -3);
1970 sc->c4 = 15*pow(rc - rsw, -4);
1971 sc->c5 = -6*pow(rc - rsw, -5);
1975 init_interaction_const(FILE *fp,
1976 const t_commrec gmx_unused *cr,
1977 interaction_const_t **interaction_const,
1978 const t_forcerec *fr,
1981 interaction_const_t *ic;
1982 gmx_bool bUsesSimpleTables = TRUE;
1986 /* Just allocate something so we can free it */
1987 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1988 snew_aligned(ic->tabq_coul_F, 16, 32);
1989 snew_aligned(ic->tabq_coul_V, 16, 32);
1991 ic->rlist = fr->rlist;
1992 ic->rlistlong = fr->rlistlong;
1995 ic->vdwtype = fr->vdwtype;
1996 ic->vdw_modifier = fr->vdw_modifier;
1997 ic->rvdw = fr->rvdw;
1998 ic->rvdw_switch = fr->rvdw_switch;
1999 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
2000 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
2001 ic->sh_lj_ewald = 0;
2002 clear_force_switch_constants(&ic->dispersion_shift);
2003 clear_force_switch_constants(&ic->repulsion_shift);
2005 switch (ic->vdw_modifier)
2007 case eintmodPOTSHIFT:
2008 /* Only shift the potential, don't touch the force */
2009 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2010 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
2011 if (EVDW_PME(ic->vdwtype))
2015 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2016 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2019 case eintmodFORCESWITCH:
2020 /* Switch the force, switch and shift the potential */
2021 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2022 &ic->dispersion_shift);
2023 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2024 &ic->repulsion_shift);
2026 case eintmodPOTSWITCH:
2027 /* Switch the potential and force */
2028 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2032 case eintmodEXACTCUTOFF:
2033 /* Nothing to do here */
2036 gmx_incons("unimplemented potential modifier");
2039 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2041 /* Electrostatics */
2042 ic->eeltype = fr->eeltype;
2043 ic->coulomb_modifier = fr->coulomb_modifier;
2044 ic->rcoulomb = fr->rcoulomb;
2045 ic->epsilon_r = fr->epsilon_r;
2046 ic->epsfac = fr->epsfac;
2047 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2049 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2051 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2058 /* Reaction-field */
2059 if (EEL_RF(ic->eeltype))
2061 ic->epsilon_rf = fr->epsilon_rf;
2062 ic->k_rf = fr->k_rf;
2063 ic->c_rf = fr->c_rf;
2067 /* For plain cut-off we might use the reaction-field kernels */
2068 ic->epsilon_rf = ic->epsilon_r;
2070 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2072 ic->c_rf = 1/ic->rcoulomb;
2082 real dispersion_shift;
2084 dispersion_shift = ic->dispersion_shift.cpot;
2085 if (EVDW_PME(ic->vdwtype))
2087 dispersion_shift -= ic->sh_lj_ewald;
2089 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2090 ic->repulsion_shift.cpot, dispersion_shift);
2092 if (ic->eeltype == eelCUT)
2094 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2096 else if (EEL_PME(ic->eeltype))
2098 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2103 *interaction_const = ic;
2105 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2107 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2109 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2110 * also sharing texture references. To keep the code simple, we don't
2111 * treat texture references as shared resources, but this means that
2112 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2113 * Hence, to ensure that the non-bonded kernels don't start before all
2114 * texture binding operations are finished, we need to wait for all ranks
2115 * to arrive here before continuing.
2117 * Note that we could omit this barrier if GPUs are not shared (or
2118 * texture objects are used), but as this is initialization code, there
2119 * is not point in complicating things.
2121 #ifdef GMX_THREAD_MPI
2126 #endif /* GMX_THREAD_MPI */
2129 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2130 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2133 static void init_nb_verlet(FILE *fp,
2134 nonbonded_verlet_t **nb_verlet,
2135 gmx_bool bFEP_NonBonded,
2136 const t_inputrec *ir,
2137 const t_forcerec *fr,
2138 const t_commrec *cr,
2139 const char *nbpu_opt)
2141 nonbonded_verlet_t *nbv;
2144 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2146 nbnxn_alloc_t *nb_alloc;
2147 nbnxn_free_t *nb_free;
2151 pick_nbnxn_resources(cr, fr->hwinfo,
2159 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2160 for (i = 0; i < nbv->ngrp; i++)
2162 nbv->grp[i].nbl_lists.nnbl = 0;
2163 nbv->grp[i].nbat = NULL;
2164 nbv->grp[i].kernel_type = nbnxnkNotSet;
2166 if (i == 0) /* local */
2168 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2169 nbv->bUseGPU, bEmulateGPU, ir,
2170 &nbv->grp[i].kernel_type,
2171 &nbv->grp[i].ewald_excl,
2174 else /* non-local */
2176 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2178 /* Use GPU for local, select a CPU kernel for non-local */
2179 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2181 &nbv->grp[i].kernel_type,
2182 &nbv->grp[i].ewald_excl,
2185 bHybridGPURun = TRUE;
2189 /* Use the same kernel for local and non-local interactions */
2190 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2191 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2198 /* init the NxN GPU data; the last argument tells whether we'll have
2199 * both local and non-local NB calculation on GPU */
2200 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2201 &fr->hwinfo->gpu_info, fr->gpu_opt,
2202 cr->rank_pp_intranode,
2203 (nbv->ngrp > 1) && !bHybridGPURun);
2205 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2209 nbv->min_ci_balanced = strtol(env, &end, 10);
2210 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2212 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2217 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2218 nbv->min_ci_balanced);
2223 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2226 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2227 nbv->min_ci_balanced);
2233 nbv->min_ci_balanced = 0;
2238 nbnxn_init_search(&nbv->nbs,
2239 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2240 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2242 gmx_omp_nthreads_get(emntPairsearch));
2244 for (i = 0; i < nbv->ngrp; i++)
2246 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2248 nb_alloc = &pmalloc;
2257 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2258 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2259 /* 8x8x8 "non-simple" lists are ATM always combined */
2260 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2264 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2266 gmx_bool bSimpleList;
2267 int enbnxninitcombrule;
2269 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2271 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2273 /* Plain LJ cut-off: we can optimize with combination rules */
2274 enbnxninitcombrule = enbnxninitcombruleDETECT;
2276 else if (fr->vdwtype == evdwPME)
2278 /* LJ-PME: we need to use a combination rule for the grid */
2279 if (fr->ljpme_combination_rule == eljpmeGEOM)
2281 enbnxninitcombrule = enbnxninitcombruleGEOM;
2285 enbnxninitcombrule = enbnxninitcombruleLB;
2290 /* We use a full combination matrix: no rule required */
2291 enbnxninitcombrule = enbnxninitcombruleNONE;
2295 snew(nbv->grp[i].nbat, 1);
2296 nbnxn_atomdata_init(fp,
2298 nbv->grp[i].kernel_type,
2300 fr->ntype, fr->nbfp,
2302 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2307 nbv->grp[i].nbat = nbv->grp[0].nbat;
2312 void init_forcerec(FILE *fp,
2313 const output_env_t oenv,
2316 const t_inputrec *ir,
2317 const gmx_mtop_t *mtop,
2318 const t_commrec *cr,
2324 const char *nbpu_opt,
2325 gmx_bool bNoSolvOpt,
2328 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2333 gmx_bool bGenericKernelOnly;
2334 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2335 gmx_bool bFEP_NonBonded;
2337 int *nm_ind, egp_flags;
2339 if (fr->hwinfo == NULL)
2341 /* Detect hardware, gather information.
2342 * In mdrun, hwinfo has already been set before calling init_forcerec.
2343 * Here we ignore GPUs, as tools will not use them anyhow.
2345 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2348 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2349 fr->use_simd_kernels = TRUE;
2351 fr->bDomDec = DOMAINDECOMP(cr);
2353 natoms = mtop->natoms;
2355 if (check_box(ir->ePBC, box))
2357 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2360 /* Test particle insertion ? */
2363 /* Set to the size of the molecule to be inserted (the last one) */
2364 /* Because of old style topologies, we have to use the last cg
2365 * instead of the last molecule type.
2367 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2368 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2369 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2371 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2379 /* Copy AdResS parameters */
2382 fr->adress_type = ir->adress->type;
2383 fr->adress_const_wf = ir->adress->const_wf;
2384 fr->adress_ex_width = ir->adress->ex_width;
2385 fr->adress_hy_width = ir->adress->hy_width;
2386 fr->adress_icor = ir->adress->icor;
2387 fr->adress_site = ir->adress->site;
2388 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2389 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2392 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2393 for (i = 0; i < ir->adress->n_energy_grps; i++)
2395 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2398 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2399 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2400 for (i = 0; i < fr->n_adress_tf_grps; i++)
2402 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2404 copy_rvec(ir->adress->refs, fr->adress_refs);
2408 fr->adress_type = eAdressOff;
2409 fr->adress_do_hybridpairs = FALSE;
2412 /* Copy the user determined parameters */
2413 fr->userint1 = ir->userint1;
2414 fr->userint2 = ir->userint2;
2415 fr->userint3 = ir->userint3;
2416 fr->userint4 = ir->userint4;
2417 fr->userreal1 = ir->userreal1;
2418 fr->userreal2 = ir->userreal2;
2419 fr->userreal3 = ir->userreal3;
2420 fr->userreal4 = ir->userreal4;
2423 fr->fc_stepsize = ir->fc_stepsize;
2426 fr->efep = ir->efep;
2427 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2428 if (ir->fepvals->bScCoul)
2430 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2431 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2435 fr->sc_alphacoul = 0;
2436 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2438 fr->sc_power = ir->fepvals->sc_power;
2439 fr->sc_r_power = ir->fepvals->sc_r_power;
2440 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2442 env = getenv("GMX_SCSIGMA_MIN");
2446 sscanf(env, "%lf", &dbl);
2447 fr->sc_sigma6_min = pow(dbl, 6);
2450 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2454 fr->bNonbonded = TRUE;
2455 if (getenv("GMX_NO_NONBONDED") != NULL)
2457 /* turn off non-bonded calculations */
2458 fr->bNonbonded = FALSE;
2459 md_print_warn(cr, fp,
2460 "Found environment variable GMX_NO_NONBONDED.\n"
2461 "Disabling nonbonded calculations.\n");
2464 bGenericKernelOnly = FALSE;
2466 /* We now check in the NS code whether a particular combination of interactions
2467 * can be used with water optimization, and disable it if that is not the case.
2470 if (getenv("GMX_NB_GENERIC") != NULL)
2475 "Found environment variable GMX_NB_GENERIC.\n"
2476 "Disabling all interaction-specific nonbonded kernels, will only\n"
2477 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2479 bGenericKernelOnly = TRUE;
2482 if (bGenericKernelOnly == TRUE)
2487 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2489 fr->use_simd_kernels = FALSE;
2493 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2494 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2498 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2500 /* Check if we can/should do all-vs-all kernels */
2501 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2502 fr->AllvsAll_work = NULL;
2503 fr->AllvsAll_workgb = NULL;
2505 /* All-vs-all kernels have not been implemented in 4.6, and
2506 * the SIMD group kernels are also buggy in this case. Non-SIMD
2507 * group kernels are OK. See Redmine #1249. */
2510 fr->bAllvsAll = FALSE;
2511 fr->use_simd_kernels = FALSE;
2515 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2516 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2517 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2518 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2519 "we are proceeding by disabling all CPU architecture-specific\n"
2520 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2521 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2525 /* Neighbour searching stuff */
2526 fr->cutoff_scheme = ir->cutoff_scheme;
2527 fr->bGrid = (ir->ns_type == ensGRID);
2528 fr->ePBC = ir->ePBC;
2530 if (fr->cutoff_scheme == ecutsGROUP)
2532 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2533 "removed in a future release when 'verlet' supports all interaction forms.\n";
2537 fprintf(stderr, "\n%s\n", note);
2541 fprintf(fp, "\n%s\n", note);
2545 /* Determine if we will do PBC for distances in bonded interactions */
2546 if (fr->ePBC == epbcNONE)
2548 fr->bMolPBC = FALSE;
2552 if (!DOMAINDECOMP(cr))
2554 /* The group cut-off scheme and SHAKE assume charge groups
2555 * are whole, but not using molpbc is faster in most cases.
2557 if (fr->cutoff_scheme == ecutsGROUP ||
2558 (ir->eConstrAlg == econtSHAKE &&
2559 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2560 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2562 fr->bMolPBC = ir->bPeriodicMols;
2567 if (getenv("GMX_USE_GRAPH") != NULL)
2569 fr->bMolPBC = FALSE;
2572 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2579 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2582 fr->bGB = (ir->implicit_solvent == eisGBSA);
2584 fr->rc_scaling = ir->refcoord_scaling;
2585 copy_rvec(ir->posres_com, fr->posres_com);
2586 copy_rvec(ir->posres_comB, fr->posres_comB);
2587 fr->rlist = cutoff_inf(ir->rlist);
2588 fr->rlistlong = cutoff_inf(ir->rlistlong);
2589 fr->eeltype = ir->coulombtype;
2590 fr->vdwtype = ir->vdwtype;
2591 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2593 fr->coulomb_modifier = ir->coulomb_modifier;
2594 fr->vdw_modifier = ir->vdw_modifier;
2596 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2597 switch (fr->eeltype)
2600 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2606 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2610 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2611 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2620 case eelPMEUSERSWITCH:
2621 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2626 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2630 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2634 /* Vdw: Translate from mdp settings to kernel format */
2635 switch (fr->vdwtype)
2640 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2644 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2648 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2654 case evdwENCADSHIFT:
2655 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2659 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2663 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2664 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2665 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2667 fr->rvdw = cutoff_inf(ir->rvdw);
2668 fr->rvdw_switch = ir->rvdw_switch;
2669 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2670 fr->rcoulomb_switch = ir->rcoulomb_switch;
2672 fr->bTwinRange = fr->rlistlong > fr->rlist;
2673 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2675 fr->reppow = mtop->ffparams.reppow;
2677 if (ir->cutoff_scheme == ecutsGROUP)
2679 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2680 && !EVDW_PME(fr->vdwtype));
2681 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2682 fr->bcoultab = !(fr->eeltype == eelCUT ||
2683 fr->eeltype == eelEWALD ||
2684 fr->eeltype == eelPME ||
2685 fr->eeltype == eelRF ||
2686 fr->eeltype == eelRF_ZERO);
2688 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2689 * going to be faster to tabulate the interaction than calling the generic kernel.
2690 * However, if generic kernels have been requested we keep things analytically.
2692 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2693 fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2694 bGenericKernelOnly == FALSE)
2696 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2698 fr->bcoultab = TRUE;
2699 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2700 * which would otherwise need two tables.
2704 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2705 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2706 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2707 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2709 if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2711 fr->bcoultab = TRUE;
2715 if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2717 fr->bcoultab = TRUE;
2719 if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2724 if (getenv("GMX_REQUIRE_TABLES"))
2727 fr->bcoultab = TRUE;
2732 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2733 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2736 if (fr->bvdwtab == TRUE)
2738 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2739 fr->nbkernel_vdw_modifier = eintmodNONE;
2741 if (fr->bcoultab == TRUE)
2743 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2744 fr->nbkernel_elec_modifier = eintmodNONE;
2748 if (ir->cutoff_scheme == ecutsVERLET)
2750 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2752 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2754 fr->bvdwtab = FALSE;
2755 fr->bcoultab = FALSE;
2758 /* Tables are used for direct ewald sum */
2761 if (EEL_PME(ir->coulombtype))
2765 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2767 if (ir->coulombtype == eelP3M_AD)
2769 please_cite(fp, "Hockney1988");
2770 please_cite(fp, "Ballenegger2012");
2774 please_cite(fp, "Essmann95a");
2777 if (ir->ewald_geometry == eewg3DC)
2781 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2783 please_cite(fp, "In-Chul99a");
2786 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2787 init_ewald_tab(&(fr->ewald_table), ir, fp);
2790 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2791 1/fr->ewaldcoeff_q);
2795 if (EVDW_PME(ir->vdwtype))
2799 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2801 please_cite(fp, "Essmann95a");
2802 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2805 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2806 1/fr->ewaldcoeff_lj);
2810 /* Electrostatics */
2811 fr->epsilon_r = ir->epsilon_r;
2812 fr->epsilon_rf = ir->epsilon_rf;
2813 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2815 /* Parameters for generalized RF */
2819 if (fr->eeltype == eelGRF)
2821 init_generalized_rf(fp, mtop, ir, fr);
2824 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2825 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2826 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2827 IR_ELEC_FIELD(*ir) ||
2828 (fr->adress_icor != eAdressICOff)
2831 if (fr->cutoff_scheme == ecutsGROUP &&
2832 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2834 /* Count the total number of charge groups */
2835 fr->cg_nalloc = ncg_mtop(mtop);
2836 srenew(fr->cg_cm, fr->cg_nalloc);
2838 if (fr->shift_vec == NULL)
2840 snew(fr->shift_vec, SHIFTS);
2843 if (fr->fshift == NULL)
2845 snew(fr->fshift, SHIFTS);
2848 if (fr->nbfp == NULL)
2850 fr->ntype = mtop->ffparams.atnr;
2851 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2852 if (EVDW_PME(fr->vdwtype))
2854 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2858 /* Copy the energy group exclusions */
2859 fr->egp_flags = ir->opts.egp_flags;
2861 /* Van der Waals stuff */
2862 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2864 if (fr->rvdw_switch >= fr->rvdw)
2866 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2867 fr->rvdw_switch, fr->rvdw);
2871 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2872 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2873 fr->rvdw_switch, fr->rvdw);
2877 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2879 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2882 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2884 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2887 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2889 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2894 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2895 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2898 fr->eDispCorr = ir->eDispCorr;
2899 if (ir->eDispCorr != edispcNO)
2901 set_avcsixtwelve(fp, fr, mtop);
2906 set_bham_b_max(fp, fr, mtop);
2909 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2911 /* Copy the GBSA data (radius, volume and surftens for each
2912 * atomtype) from the topology atomtype section to forcerec.
2914 snew(fr->atype_radius, fr->ntype);
2915 snew(fr->atype_vol, fr->ntype);
2916 snew(fr->atype_surftens, fr->ntype);
2917 snew(fr->atype_gb_radius, fr->ntype);
2918 snew(fr->atype_S_hct, fr->ntype);
2920 if (mtop->atomtypes.nr > 0)
2922 for (i = 0; i < fr->ntype; i++)
2924 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2926 for (i = 0; i < fr->ntype; i++)
2928 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2930 for (i = 0; i < fr->ntype; i++)
2932 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2934 for (i = 0; i < fr->ntype; i++)
2936 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2938 for (i = 0; i < fr->ntype; i++)
2940 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2944 /* Generate the GB table if needed */
2948 fr->gbtabscale = 2000;
2950 fr->gbtabscale = 500;
2954 fr->gbtab = make_gb_table(oenv, fr);
2956 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2958 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2959 if (!DOMAINDECOMP(cr))
2961 make_local_gb(cr, fr->born, ir->gb_algorithm);
2965 /* Set the charge scaling */
2966 if (fr->epsilon_r != 0)
2968 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2972 /* eps = 0 is infinite dieletric: no coulomb interactions */
2976 /* Reaction field constants */
2977 if (EEL_RF(fr->eeltype))
2979 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2980 fr->rcoulomb, fr->temp, fr->zsquare, box,
2981 &fr->kappa, &fr->k_rf, &fr->c_rf);
2984 /*This now calculates sum for q and c6*/
2985 set_chargesum(fp, fr, mtop);
2987 /* if we are using LR electrostatics, and they are tabulated,
2988 * the tables will contain modified coulomb interactions.
2989 * Since we want to use the non-shifted ones for 1-4
2990 * coulombic interactions, we must have an extra set of tables.
2993 /* Construct tables.
2994 * A little unnecessary to make both vdw and coul tables sometimes,
2995 * but what the heck... */
2997 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2998 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
3000 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
3001 fr->coulomb_modifier != eintmodNONE ||
3002 fr->vdw_modifier != eintmodNONE ||
3003 fr->bBHAM || fr->bEwald) &&
3004 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
3005 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
3006 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
3008 negp_pp = ir->opts.ngener - ir->nwall;
3012 bSomeNormalNbListsAreInUse = TRUE;
3017 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
3018 for (egi = 0; egi < negp_pp; egi++)
3020 for (egj = egi; egj < negp_pp; egj++)
3022 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3023 if (!(egp_flags & EGP_EXCL))
3025 if (egp_flags & EGP_TABLE)
3031 bSomeNormalNbListsAreInUse = TRUE;
3036 if (bSomeNormalNbListsAreInUse)
3038 fr->nnblists = negptable + 1;
3042 fr->nnblists = negptable;
3044 if (fr->nnblists > 1)
3046 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3055 snew(fr->nblists, fr->nnblists);
3057 /* This code automatically gives table length tabext without cut-off's,
3058 * in that case grompp should already have checked that we do not need
3059 * normal tables and we only generate tables for 1-4 interactions.
3061 rtab = ir->rlistlong + ir->tabext;
3065 /* make tables for ordinary interactions */
3066 if (bSomeNormalNbListsAreInUse)
3068 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3071 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3073 if (!bMakeSeparate14Table)
3075 fr->tab14 = fr->nblists[0].table_elec_vdw;
3085 /* Read the special tables for certain energy group pairs */
3086 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3087 for (egi = 0; egi < negp_pp; egi++)
3089 for (egj = egi; egj < negp_pp; egj++)
3091 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3092 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3094 nbl = &(fr->nblists[m]);
3095 if (fr->nnblists > 1)
3097 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3099 /* Read the table file with the two energy groups names appended */
3100 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3101 *mtop->groups.grpname[nm_ind[egi]],
3102 *mtop->groups.grpname[nm_ind[egj]],
3106 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3107 *mtop->groups.grpname[nm_ind[egi]],
3108 *mtop->groups.grpname[nm_ind[egj]],
3109 &fr->nblists[fr->nnblists/2+m]);
3113 else if (fr->nnblists > 1)
3115 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3121 else if ((fr->eDispCorr != edispcNO) &&
3122 ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3123 (fr->vdw_modifier == eintmodFORCESWITCH) ||
3124 (fr->vdw_modifier == eintmodPOTSHIFT)))
3126 /* Tables might not be used for the potential modifier interactions per se, but
3127 * we still need them to evaluate switch/shift dispersion corrections in this case.
3129 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3132 if (bMakeSeparate14Table)
3134 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3135 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3136 GMX_MAKETABLES_14ONLY);
3139 /* Read AdResS Thermo Force table if needed */
3140 if (fr->adress_icor == eAdressICThermoForce)
3142 /* old todo replace */
3144 if (ir->adress->n_tf_grps > 0)
3146 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3151 /* load the default table */
3152 snew(fr->atf_tabs, 1);
3153 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3158 fr->nwall = ir->nwall;
3159 if (ir->nwall && ir->wall_type == ewtTABLE)
3161 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3166 fcd->bondtab = make_bonded_tables(fp,
3167 F_TABBONDS, F_TABBONDSNC,
3169 fcd->angletab = make_bonded_tables(fp,
3172 fcd->dihtab = make_bonded_tables(fp,
3180 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3184 /* QM/MM initialization if requested
3188 fprintf(stderr, "QM/MM calculation requested.\n");
3191 fr->bQMMM = ir->bQMMM;
3192 fr->qr = mk_QMMMrec();
3194 /* Set all the static charge group info */
3195 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3197 &fr->bExcl_IntraCGAll_InterCGNone);
3198 if (DOMAINDECOMP(cr))
3204 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3207 if (!DOMAINDECOMP(cr))
3209 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3210 mtop->natoms, mtop->natoms, mtop->natoms);
3213 fr->print_force = print_force;
3216 /* coarse load balancing vars */
3221 /* Initialize neighbor search */
3222 init_ns(fp, cr, &fr->ns, fr, mtop);
3224 if (cr->duty & DUTY_PP)
3226 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3230 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3235 /* Initialize the thread working data for bonded interactions */
3236 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3238 snew(fr->excl_load, fr->nthreads+1);
3240 if (fr->cutoff_scheme == ecutsVERLET)
3242 if (ir->rcoulomb != ir->rvdw)
3244 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3247 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3250 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3251 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3253 if (ir->eDispCorr != edispcNO)
3255 calc_enervirdiff(fp, ir->eDispCorr, fr);
3259 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3260 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3261 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3263 void pr_forcerec(FILE *fp, t_forcerec *fr)
3267 pr_real(fp, fr->rlist);
3268 pr_real(fp, fr->rcoulomb);
3269 pr_real(fp, fr->fudgeQQ);
3270 pr_bool(fp, fr->bGrid);
3271 pr_bool(fp, fr->bTwinRange);
3272 /*pr_int(fp,fr->cg0);
3273 pr_int(fp,fr->hcg);*/
3274 for (i = 0; i < fr->nnblists; i++)
3276 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3278 pr_real(fp, fr->rcoulomb_switch);
3279 pr_real(fp, fr->rcoulomb);
3284 void forcerec_set_excl_load(t_forcerec *fr,
3285 const gmx_localtop_t *top)
3288 int t, i, j, ntot, n, ntarget;
3290 ind = top->excls.index;
3294 for (i = 0; i < top->excls.nr; i++)
3296 for (j = ind[i]; j < ind[i+1]; j++)
3305 fr->excl_load[0] = 0;
3308 for (t = 1; t <= fr->nthreads; t++)
3310 ntarget = (ntot*t)/fr->nthreads;
3311 while (i < top->excls.nr && n < ntarget)
3313 for (j = ind[i]; j < ind[i+1]; j++)
3322 fr->excl_load[t] = i;
3326 /* Frees GPU memory and destroys the CUDA context.
3328 * Note that this function needs to be called even if GPUs are not used
3329 * in this run because the PME ranks have no knowledge of whether GPUs
3330 * are used or not, but all ranks need to enter the barrier below.
3332 void free_gpu_resources(const t_forcerec *fr,
3333 const t_commrec *cr)
3335 gmx_bool bIsPPrankUsingGPU;
3336 char gpu_err_str[STRLEN];
3338 bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
3340 if (bIsPPrankUsingGPU)
3342 /* free nbnxn data in GPU memory */
3343 nbnxn_cuda_free(fr->nbv->cu_nbv);
3345 /* With tMPI we need to wait for all ranks to finish deallocation before
3346 * destroying the context in free_gpu() as some ranks may be sharing
3348 * Note: as only PP ranks need to free GPU resources, so it is safe to
3349 * not call the barrier on PME ranks.
3351 #ifdef GMX_THREAD_MPI
3356 #endif /* GMX_THREAD_MPI */
3358 /* uninitialize GPU (by destroying the context) */
3359 if (!free_gpu(gpu_err_str))
3361 gmx_warning("On rank %d failed to free GPU #%d: %s",
3362 cr->nodeid, get_current_gpu_device_id(), gpu_err_str);