2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/legacyheaders/copyrite.h"
46 #include "gromacs/legacyheaders/coulomb.h"
47 #include "gromacs/legacyheaders/domdec.h"
48 #include "gromacs/legacyheaders/force.h"
49 #include "gromacs/legacyheaders/gmx_detect_hardware.h"
50 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
51 #include "gromacs/legacyheaders/gpu_utils.h"
52 #include "gromacs/legacyheaders/inputrec.h"
53 #include "gromacs/legacyheaders/macros.h"
54 #include "gromacs/legacyheaders/md_logging.h"
55 #include "gromacs/legacyheaders/md_support.h"
56 #include "gromacs/legacyheaders/names.h"
57 #include "gromacs/legacyheaders/network.h"
58 #include "gromacs/legacyheaders/nonbonded.h"
59 #include "gromacs/legacyheaders/ns.h"
60 #include "gromacs/legacyheaders/pmalloc_cuda.h"
61 #include "gromacs/legacyheaders/qmmm.h"
62 #include "gromacs/legacyheaders/tables.h"
63 #include "gromacs/legacyheaders/txtdump.h"
64 #include "gromacs/legacyheaders/typedefs.h"
65 #include "gromacs/legacyheaders/types/commrec.h"
66 #include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
67 #include "gromacs/math/units.h"
68 #include "gromacs/math/utilities.h"
69 #include "gromacs/math/vec.h"
70 #include "gromacs/mdlib/nb_verlet.h"
71 #include "gromacs/mdlib/nbnxn_atomdata.h"
72 #include "gromacs/mdlib/nbnxn_consts.h"
73 #include "gromacs/mdlib/nbnxn_search.h"
74 #include "gromacs/mdlib/nbnxn_simd.h"
75 #include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
76 #include "gromacs/pbcutil/ishift.h"
77 #include "gromacs/pbcutil/pbc.h"
78 #include "gromacs/topology/mtop_util.h"
79 #include "gromacs/utility/fatalerror.h"
80 #include "gromacs/utility/smalloc.h"
82 t_forcerec *mk_forcerec(void)
92 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
96 for (i = 0; (i < atnr); i++)
98 for (j = 0; (j < atnr); j++)
100 fprintf(fp, "%2d - %2d", i, j);
103 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
104 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
108 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
109 C12(nbfp, atnr, i, j)/12.0);
116 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
124 snew(nbfp, 3*atnr*atnr);
125 for (i = k = 0; (i < atnr); i++)
127 for (j = 0; (j < atnr); j++, k++)
129 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
130 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
131 /* nbfp now includes the 6.0 derivative prefactor */
132 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
138 snew(nbfp, 2*atnr*atnr);
139 for (i = k = 0; (i < atnr); i++)
141 for (j = 0; (j < atnr); j++, k++)
143 /* nbfp now includes the 6.0/12.0 derivative prefactors */
144 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
145 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
153 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
156 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
159 /* For LJ-PME simulations, we correct the energies with the reciprocal space
160 * inside of the cut-off. To do this the non-bonded kernels needs to have
161 * access to the C6-values used on the reciprocal grid in pme.c
165 snew(grid, 2*atnr*atnr);
166 for (i = k = 0; (i < atnr); i++)
168 for (j = 0; (j < atnr); j++, k++)
170 c6i = idef->iparams[i*(atnr+1)].lj.c6;
171 c12i = idef->iparams[i*(atnr+1)].lj.c12;
172 c6j = idef->iparams[j*(atnr+1)].lj.c6;
173 c12j = idef->iparams[j*(atnr+1)].lj.c12;
174 c6 = sqrt(c6i * c6j);
175 if (fr->ljpme_combination_rule == eljpmeLB
176 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
178 sigmai = pow(c12i / c6i, 1.0/6.0);
179 sigmaj = pow(c12j / c6j, 1.0/6.0);
180 epsi = c6i * c6i / c12i;
181 epsj = c6j * c6j / c12j;
182 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
184 /* Store the elements at the same relative positions as C6 in nbfp in order
185 * to simplify access in the kernels
187 grid[2*(atnr*i+j)] = c6*6.0;
193 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
197 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
201 snew(nbfp, 2*atnr*atnr);
202 for (i = 0; i < atnr; ++i)
204 for (j = 0; j < atnr; ++j)
206 c6i = idef->iparams[i*(atnr+1)].lj.c6;
207 c12i = idef->iparams[i*(atnr+1)].lj.c12;
208 c6j = idef->iparams[j*(atnr+1)].lj.c6;
209 c12j = idef->iparams[j*(atnr+1)].lj.c12;
210 c6 = sqrt(c6i * c6j);
211 c12 = sqrt(c12i * c12j);
212 if (comb_rule == eCOMB_ARITHMETIC
213 && !gmx_numzero(c6) && !gmx_numzero(c12))
215 sigmai = pow(c12i / c6i, 1.0/6.0);
216 sigmaj = pow(c12j / c6j, 1.0/6.0);
217 epsi = c6i * c6i / c12i;
218 epsj = c6j * c6j / c12j;
219 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
220 c12 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
222 C6(nbfp, atnr, i, j) = c6*6.0;
223 C12(nbfp, atnr, i, j) = c12*12.0;
229 /* This routine sets fr->solvent_opt to the most common solvent in the
230 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
231 * the fr->solvent_type array with the correct type (or esolNO).
233 * Charge groups that fulfill the conditions but are not identical to the
234 * most common one will be marked as esolNO in the solvent_type array.
236 * TIP3p is identical to SPC for these purposes, so we call it
237 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
239 * NOTE: QM particle should not
240 * become an optimized solvent. Not even if there is only one charge
250 } solvent_parameters_t;
253 check_solvent_cg(const gmx_moltype_t *molt,
256 const unsigned char *qm_grpnr,
257 const t_grps *qm_grps,
259 int *n_solvent_parameters,
260 solvent_parameters_t **solvent_parameters_p,
264 const t_blocka *excl;
271 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
272 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
275 solvent_parameters_t *solvent_parameters;
277 /* We use a list with parameters for each solvent type.
278 * Every time we discover a new molecule that fulfills the basic
279 * conditions for a solvent we compare with the previous entries
280 * in these lists. If the parameters are the same we just increment
281 * the counter for that type, and otherwise we create a new type
282 * based on the current molecule.
284 * Once we've finished going through all molecules we check which
285 * solvent is most common, and mark all those molecules while we
286 * clear the flag on all others.
289 solvent_parameters = *solvent_parameters_p;
291 /* Mark the cg first as non optimized */
294 /* Check if this cg has no exclusions with atoms in other charge groups
295 * and all atoms inside the charge group excluded.
296 * We only have 3 or 4 atom solvent loops.
298 if (GET_CGINFO_EXCL_INTER(cginfo) ||
299 !GET_CGINFO_EXCL_INTRA(cginfo))
304 /* Get the indices of the first atom in this charge group */
305 j0 = molt->cgs.index[cg0];
306 j1 = molt->cgs.index[cg0+1];
308 /* Number of atoms in our molecule */
314 "Moltype '%s': there are %d atoms in this charge group\n",
318 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
321 if (nj < 3 || nj > 4)
326 /* Check if we are doing QM on this group */
328 if (qm_grpnr != NULL)
330 for (j = j0; j < j1 && !qm; j++)
332 qm = (qm_grpnr[j] < qm_grps->nr - 1);
335 /* Cannot use solvent optimization with QM */
341 atom = molt->atoms.atom;
343 /* Still looks like a solvent, time to check parameters */
345 /* If it is perturbed (free energy) we can't use the solvent loops,
346 * so then we just skip to the next molecule.
350 for (j = j0; j < j1 && !perturbed; j++)
352 perturbed = PERTURBED(atom[j]);
360 /* Now it's only a question if the VdW and charge parameters
361 * are OK. Before doing the check we compare and see if they are
362 * identical to a possible previous solvent type.
363 * First we assign the current types and charges.
365 for (j = 0; j < nj; j++)
367 tmp_vdwtype[j] = atom[j0+j].type;
368 tmp_charge[j] = atom[j0+j].q;
371 /* Does it match any previous solvent type? */
372 for (k = 0; k < *n_solvent_parameters; k++)
377 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
378 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
379 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
384 /* Check that types & charges match for all atoms in molecule */
385 for (j = 0; j < nj && match == TRUE; j++)
387 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
391 if (tmp_charge[j] != solvent_parameters[k].charge[j])
398 /* Congratulations! We have a matched solvent.
399 * Flag it with this type for later processing.
402 solvent_parameters[k].count += nmol;
404 /* We are done with this charge group */
409 /* If we get here, we have a tentative new solvent type.
410 * Before we add it we must check that it fulfills the requirements
411 * of the solvent optimized loops. First determine which atoms have
414 for (j = 0; j < nj; j++)
417 tjA = tmp_vdwtype[j];
419 /* Go through all other tpes and see if any have non-zero
420 * VdW parameters when combined with this one.
422 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
424 /* We already checked that the atoms weren't perturbed,
425 * so we only need to check state A now.
429 has_vdw[j] = (has_vdw[j] ||
430 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
431 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
432 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
437 has_vdw[j] = (has_vdw[j] ||
438 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
439 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
444 /* Now we know all we need to make the final check and assignment. */
448 * For this we require thatn all atoms have charge,
449 * the charges on atom 2 & 3 should be the same, and only
450 * atom 1 might have VdW.
452 if (has_vdw[1] == FALSE &&
453 has_vdw[2] == FALSE &&
454 tmp_charge[0] != 0 &&
455 tmp_charge[1] != 0 &&
456 tmp_charge[2] == tmp_charge[1])
458 srenew(solvent_parameters, *n_solvent_parameters+1);
459 solvent_parameters[*n_solvent_parameters].model = esolSPC;
460 solvent_parameters[*n_solvent_parameters].count = nmol;
461 for (k = 0; k < 3; k++)
463 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
464 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
467 *cg_sp = *n_solvent_parameters;
468 (*n_solvent_parameters)++;
473 /* Or could it be a TIP4P?
474 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
475 * Only atom 1 mght have VdW.
477 if (has_vdw[1] == FALSE &&
478 has_vdw[2] == FALSE &&
479 has_vdw[3] == FALSE &&
480 tmp_charge[0] == 0 &&
481 tmp_charge[1] != 0 &&
482 tmp_charge[2] == tmp_charge[1] &&
485 srenew(solvent_parameters, *n_solvent_parameters+1);
486 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
487 solvent_parameters[*n_solvent_parameters].count = nmol;
488 for (k = 0; k < 4; k++)
490 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
491 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
494 *cg_sp = *n_solvent_parameters;
495 (*n_solvent_parameters)++;
499 *solvent_parameters_p = solvent_parameters;
503 check_solvent(FILE * fp,
504 const gmx_mtop_t * mtop,
506 cginfo_mb_t *cginfo_mb)
509 const t_block * mols;
510 const gmx_moltype_t *molt;
511 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
512 int n_solvent_parameters;
513 solvent_parameters_t *solvent_parameters;
519 fprintf(debug, "Going to determine what solvent types we have.\n");
524 n_solvent_parameters = 0;
525 solvent_parameters = NULL;
526 /* Allocate temporary array for solvent type */
527 snew(cg_sp, mtop->nmolblock);
531 for (mb = 0; mb < mtop->nmolblock; mb++)
533 molt = &mtop->moltype[mtop->molblock[mb].type];
535 /* Here we have to loop over all individual molecules
536 * because we need to check for QMMM particles.
538 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
539 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
540 nmol = mtop->molblock[mb].nmol/nmol_ch;
541 for (mol = 0; mol < nmol_ch; mol++)
544 am = mol*cgs->index[cgs->nr];
545 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
547 check_solvent_cg(molt, cg_mol, nmol,
548 mtop->groups.grpnr[egcQMMM] ?
549 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
550 &mtop->groups.grps[egcQMMM],
552 &n_solvent_parameters, &solvent_parameters,
553 cginfo_mb[mb].cginfo[cgm+cg_mol],
554 &cg_sp[mb][cgm+cg_mol]);
557 cg_offset += cgs->nr;
558 at_offset += cgs->index[cgs->nr];
561 /* Puh! We finished going through all charge groups.
562 * Now find the most common solvent model.
565 /* Most common solvent this far */
567 for (i = 0; i < n_solvent_parameters; i++)
570 solvent_parameters[i].count > solvent_parameters[bestsp].count)
578 bestsol = solvent_parameters[bestsp].model;
586 for (mb = 0; mb < mtop->nmolblock; mb++)
588 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
589 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
590 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
592 if (cg_sp[mb][i] == bestsp)
594 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
599 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
606 if (bestsol != esolNO && fp != NULL)
608 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
610 solvent_parameters[bestsp].count);
613 sfree(solvent_parameters);
614 fr->solvent_opt = bestsol;
618 acNONE = 0, acCONSTRAINT, acSETTLE
621 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
622 t_forcerec *fr, gmx_bool bNoSolvOpt,
623 gmx_bool *bFEP_NonBonded,
624 gmx_bool *bExcl_IntraCGAll_InterCGNone)
627 const t_blocka *excl;
628 const gmx_moltype_t *molt;
629 const gmx_molblock_t *molb;
630 cginfo_mb_t *cginfo_mb;
633 int cg_offset, a_offset, cgm, am;
634 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
638 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
640 ncg_tot = ncg_mtop(mtop);
641 snew(cginfo_mb, mtop->nmolblock);
643 snew(type_VDW, fr->ntype);
644 for (ai = 0; ai < fr->ntype; ai++)
646 type_VDW[ai] = FALSE;
647 for (j = 0; j < fr->ntype; j++)
649 type_VDW[ai] = type_VDW[ai] ||
651 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
652 C12(fr->nbfp, fr->ntype, ai, j) != 0;
656 *bFEP_NonBonded = FALSE;
657 *bExcl_IntraCGAll_InterCGNone = TRUE;
660 snew(bExcl, excl_nalloc);
663 for (mb = 0; mb < mtop->nmolblock; mb++)
665 molb = &mtop->molblock[mb];
666 molt = &mtop->moltype[molb->type];
670 /* Check if the cginfo is identical for all molecules in this block.
671 * If so, we only need an array of the size of one molecule.
672 * Otherwise we make an array of #mol times #cgs per molecule.
676 for (m = 0; m < molb->nmol; m++)
678 am = m*cgs->index[cgs->nr];
679 for (cg = 0; cg < cgs->nr; cg++)
682 a1 = cgs->index[cg+1];
683 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
684 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
688 if (mtop->groups.grpnr[egcQMMM] != NULL)
690 for (ai = a0; ai < a1; ai++)
692 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
693 mtop->groups.grpnr[egcQMMM][a_offset +ai])
702 cginfo_mb[mb].cg_start = cg_offset;
703 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
704 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
705 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
706 cginfo = cginfo_mb[mb].cginfo;
708 /* Set constraints flags for constrained atoms */
709 snew(a_con, molt->atoms.nr);
710 for (ftype = 0; ftype < F_NRE; ftype++)
712 if (interaction_function[ftype].flags & IF_CONSTRAINT)
717 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
721 for (a = 0; a < nral; a++)
723 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
724 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
730 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
733 am = m*cgs->index[cgs->nr];
734 for (cg = 0; cg < cgs->nr; cg++)
737 a1 = cgs->index[cg+1];
739 /* Store the energy group in cginfo */
740 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
741 SET_CGINFO_GID(cginfo[cgm+cg], gid);
743 /* Check the intra/inter charge group exclusions */
744 if (a1-a0 > excl_nalloc)
746 excl_nalloc = a1 - a0;
747 srenew(bExcl, excl_nalloc);
749 /* bExclIntraAll: all intra cg interactions excluded
750 * bExclInter: any inter cg interactions excluded
752 bExclIntraAll = TRUE;
756 bHavePerturbedAtoms = FALSE;
757 for (ai = a0; ai < a1; ai++)
759 /* Check VDW and electrostatic interactions */
760 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
761 type_VDW[molt->atoms.atom[ai].typeB]);
762 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
763 molt->atoms.atom[ai].qB != 0);
765 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
767 /* Clear the exclusion list for atom ai */
768 for (aj = a0; aj < a1; aj++)
770 bExcl[aj-a0] = FALSE;
772 /* Loop over all the exclusions of atom ai */
773 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
776 if (aj < a0 || aj >= a1)
785 /* Check if ai excludes a0 to a1 */
786 for (aj = a0; aj < a1; aj++)
790 bExclIntraAll = FALSE;
797 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
800 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
808 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
812 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
814 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
816 /* The size in cginfo is currently only read with DD */
817 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
821 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
825 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
827 if (bHavePerturbedAtoms && fr->efep != efepNO)
829 SET_CGINFO_FEP(cginfo[cgm+cg]);
830 *bFEP_NonBonded = TRUE;
832 /* Store the charge group size */
833 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
835 if (!bExclIntraAll || bExclInter)
837 *bExcl_IntraCGAll_InterCGNone = FALSE;
844 cg_offset += molb->nmol*cgs->nr;
845 a_offset += molb->nmol*cgs->index[cgs->nr];
849 /* the solvent optimizer is called after the QM is initialized,
850 * because we don't want to have the QM subsystemto become an
854 check_solvent(fplog, mtop, fr, cginfo_mb);
856 if (getenv("GMX_NO_SOLV_OPT"))
860 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
861 "Disabling all solvent optimization\n");
863 fr->solvent_opt = esolNO;
867 fr->solvent_opt = esolNO;
869 if (!fr->solvent_opt)
871 for (mb = 0; mb < mtop->nmolblock; mb++)
873 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
875 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
883 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
888 ncg = cgi_mb[nmb-1].cg_end;
891 for (cg = 0; cg < ncg; cg++)
893 while (cg >= cgi_mb[mb].cg_end)
898 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
904 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
906 /*This now calculates sum for q and c6*/
907 double qsum, q2sum, q, c6sum, c6;
909 const t_atoms *atoms;
914 for (mb = 0; mb < mtop->nmolblock; mb++)
916 nmol = mtop->molblock[mb].nmol;
917 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
918 for (i = 0; i < atoms->nr; i++)
920 q = atoms->atom[i].q;
923 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
928 fr->q2sum[0] = q2sum;
929 fr->c6sum[0] = c6sum;
931 if (fr->efep != efepNO)
936 for (mb = 0; mb < mtop->nmolblock; mb++)
938 nmol = mtop->molblock[mb].nmol;
939 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
940 for (i = 0; i < atoms->nr; i++)
942 q = atoms->atom[i].qB;
945 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
949 fr->q2sum[1] = q2sum;
950 fr->c6sum[1] = c6sum;
955 fr->qsum[1] = fr->qsum[0];
956 fr->q2sum[1] = fr->q2sum[0];
957 fr->c6sum[1] = fr->c6sum[0];
961 if (fr->efep == efepNO)
963 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
967 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
968 fr->qsum[0], fr->qsum[1]);
973 void update_forcerec(t_forcerec *fr, matrix box)
975 if (fr->eeltype == eelGRF)
977 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
978 fr->rcoulomb, fr->temp, fr->zsquare, box,
979 &fr->kappa, &fr->k_rf, &fr->c_rf);
983 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
985 const t_atoms *atoms, *atoms_tpi;
986 const t_blocka *excl;
987 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
988 gmx_int64_t npair, npair_ij, tmpi, tmpj;
989 double csix, ctwelve;
993 real *nbfp_comb = NULL;
999 /* For LJ-PME, we want to correct for the difference between the
1000 * actual C6 values and the C6 values used by the LJ-PME based on
1001 * combination rules. */
1003 if (EVDW_PME(fr->vdwtype))
1005 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1006 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1007 for (tpi = 0; tpi < ntp; ++tpi)
1009 for (tpj = 0; tpj < ntp; ++tpj)
1011 C6(nbfp_comb, ntp, tpi, tpj) =
1012 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1013 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1018 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1026 /* Count the types so we avoid natoms^2 operations */
1027 snew(typecount, ntp);
1028 gmx_mtop_count_atomtypes(mtop, q, typecount);
1030 for (tpi = 0; tpi < ntp; tpi++)
1032 for (tpj = tpi; tpj < ntp; tpj++)
1034 tmpi = typecount[tpi];
1035 tmpj = typecount[tpj];
1038 npair_ij = tmpi*tmpj;
1042 npair_ij = tmpi*(tmpi - 1)/2;
1046 /* nbfp now includes the 6.0 derivative prefactor */
1047 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1051 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1052 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1053 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1059 /* Subtract the excluded pairs.
1060 * The main reason for substracting exclusions is that in some cases
1061 * some combinations might never occur and the parameters could have
1062 * any value. These unused values should not influence the dispersion
1065 for (mb = 0; mb < mtop->nmolblock; mb++)
1067 nmol = mtop->molblock[mb].nmol;
1068 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1069 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1070 for (i = 0; (i < atoms->nr); i++)
1074 tpi = atoms->atom[i].type;
1078 tpi = atoms->atom[i].typeB;
1080 j1 = excl->index[i];
1081 j2 = excl->index[i+1];
1082 for (j = j1; j < j2; j++)
1089 tpj = atoms->atom[k].type;
1093 tpj = atoms->atom[k].typeB;
1097 /* nbfp now includes the 6.0 derivative prefactor */
1098 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1102 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1103 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1104 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1114 /* Only correct for the interaction of the test particle
1115 * with the rest of the system.
1118 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1121 for (mb = 0; mb < mtop->nmolblock; mb++)
1123 nmol = mtop->molblock[mb].nmol;
1124 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1125 for (j = 0; j < atoms->nr; j++)
1128 /* Remove the interaction of the test charge group
1131 if (mb == mtop->nmolblock-1)
1135 if (mb == 0 && nmol == 1)
1137 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1142 tpj = atoms->atom[j].type;
1146 tpj = atoms->atom[j].typeB;
1148 for (i = 0; i < fr->n_tpi; i++)
1152 tpi = atoms_tpi->atom[i].type;
1156 tpi = atoms_tpi->atom[i].typeB;
1160 /* nbfp now includes the 6.0 derivative prefactor */
1161 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1165 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1166 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1167 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1174 if (npair - nexcl <= 0 && fplog)
1176 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1182 csix /= npair - nexcl;
1183 ctwelve /= npair - nexcl;
1187 fprintf(debug, "Counted %d exclusions\n", nexcl);
1188 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1189 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1191 fr->avcsix[q] = csix;
1192 fr->avctwelve[q] = ctwelve;
1195 if (EVDW_PME(fr->vdwtype))
1202 if (fr->eDispCorr == edispcAllEner ||
1203 fr->eDispCorr == edispcAllEnerPres)
1205 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1206 fr->avcsix[0], fr->avctwelve[0]);
1210 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1216 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1217 const gmx_mtop_t *mtop)
1219 const t_atoms *at1, *at2;
1220 int mt1, mt2, i, j, tpi, tpj, ntypes;
1226 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1233 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1235 at1 = &mtop->moltype[mt1].atoms;
1236 for (i = 0; (i < at1->nr); i++)
1238 tpi = at1->atom[i].type;
1241 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1244 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1246 at2 = &mtop->moltype[mt2].atoms;
1247 for (j = 0; (j < at2->nr); j++)
1249 tpj = at2->atom[j].type;
1252 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1254 b = BHAMB(nbfp, ntypes, tpi, tpj);
1255 if (b > fr->bham_b_max)
1259 if ((b < bmin) || (bmin == -1))
1269 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1270 bmin, fr->bham_b_max);
1274 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1275 t_forcerec *fr, real rtab,
1276 const t_commrec *cr,
1277 const char *tabfn, char *eg1, char *eg2,
1287 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1292 sprintf(buf, "%s", tabfn);
1295 /* Append the two energy group names */
1296 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1297 eg1, eg2, ftp2ext(efXVG));
1299 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1300 /* Copy the contents of the table to separate coulomb and LJ tables too,
1301 * to improve cache performance.
1303 /* For performance reasons we want
1304 * the table data to be aligned to 16-byte. The pointers could be freed
1305 * but currently aren't.
1307 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1308 nbl->table_elec.format = nbl->table_elec_vdw.format;
1309 nbl->table_elec.r = nbl->table_elec_vdw.r;
1310 nbl->table_elec.n = nbl->table_elec_vdw.n;
1311 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1312 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1313 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1314 nbl->table_elec.ninteractions = 1;
1315 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1316 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1318 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1319 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1320 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1321 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1322 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1323 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1324 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1325 nbl->table_vdw.ninteractions = 2;
1326 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1327 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1329 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1331 for (j = 0; j < 4; j++)
1333 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1335 for (j = 0; j < 8; j++)
1337 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1342 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1343 int *ncount, int **count)
1345 const gmx_moltype_t *molt;
1347 int mt, ftype, stride, i, j, tabnr;
1349 for (mt = 0; mt < mtop->nmoltype; mt++)
1351 molt = &mtop->moltype[mt];
1352 for (ftype = 0; ftype < F_NRE; ftype++)
1354 if (ftype == ftype1 || ftype == ftype2)
1356 il = &molt->ilist[ftype];
1357 stride = 1 + NRAL(ftype);
1358 for (i = 0; i < il->nr; i += stride)
1360 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1363 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1365 if (tabnr >= *ncount)
1367 srenew(*count, tabnr+1);
1368 for (j = *ncount; j < tabnr+1; j++)
1381 static bondedtable_t *make_bonded_tables(FILE *fplog,
1382 int ftype1, int ftype2,
1383 const gmx_mtop_t *mtop,
1384 const char *basefn, const char *tabext)
1386 int i, ncount, *count;
1394 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1399 for (i = 0; i < ncount; i++)
1403 sprintf(tabfn, "%s", basefn);
1404 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1405 tabext, i, ftp2ext(efXVG));
1406 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1415 void forcerec_set_ranges(t_forcerec *fr,
1416 int ncg_home, int ncg_force,
1418 int natoms_force_constr, int natoms_f_novirsum)
1423 /* fr->ncg_force is unused in the standard code,
1424 * but it can be useful for modified code dealing with charge groups.
1426 fr->ncg_force = ncg_force;
1427 fr->natoms_force = natoms_force;
1428 fr->natoms_force_constr = natoms_force_constr;
1430 if (fr->natoms_force_constr > fr->nalloc_force)
1432 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1436 srenew(fr->f_twin, fr->nalloc_force);
1440 if (fr->bF_NoVirSum)
1442 fr->f_novirsum_n = natoms_f_novirsum;
1443 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1445 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1446 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1451 fr->f_novirsum_n = 0;
1455 static real cutoff_inf(real cutoff)
1459 cutoff = GMX_CUTOFF_INF;
1465 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1466 t_forcerec *fr, const t_inputrec *ir,
1467 const char *tabfn, const gmx_mtop_t *mtop,
1475 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1479 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1481 sprintf(buf, "%s", tabfn);
1482 for (i = 0; i < ir->adress->n_tf_grps; i++)
1484 j = ir->adress->tf_table_index[i]; /* get energy group index */
1485 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1486 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1489 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1491 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1496 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1503 ir->rcoulomb == 0 &&
1505 ir->ePBC == epbcNONE &&
1506 ir->vdwtype == evdwCUT &&
1507 ir->coulombtype == eelCUT &&
1508 ir->efep == efepNO &&
1509 (ir->implicit_solvent == eisNO ||
1510 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1511 ir->gb_algorithm == egbHCT ||
1512 ir->gb_algorithm == egbOBC))) &&
1513 getenv("GMX_NO_ALLVSALL") == NULL
1516 if (bAllvsAll && ir->opts.ngener > 1)
1518 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1524 fprintf(stderr, "\n%s\n", note);
1528 fprintf(fp, "\n%s\n", note);
1534 if (bAllvsAll && fp && MASTER(cr))
1536 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1543 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1547 /* These thread local data structures are used for bondeds only */
1548 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1550 if (fr->nthreads > 1)
1552 snew(fr->f_t, fr->nthreads);
1553 /* Thread 0 uses the global force and energy arrays */
1554 for (t = 1; t < fr->nthreads; t++)
1556 fr->f_t[t].f = NULL;
1557 fr->f_t[t].f_nalloc = 0;
1558 snew(fr->f_t[t].fshift, SHIFTS);
1559 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1560 for (i = 0; i < egNR; i++)
1562 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1569 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1570 const t_commrec *cr,
1571 const t_inputrec *ir,
1574 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1576 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1577 bGPU ? "GPUs" : "SIMD kernels",
1578 bGPU ? "CPU only" : "plain-C kernels");
1586 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1590 *kernel_type = nbnxnk4x4_PlainC;
1591 *ewald_excl = ewaldexclTable;
1593 #ifdef GMX_NBNXN_SIMD
1595 #ifdef GMX_NBNXN_SIMD_4XN
1596 *kernel_type = nbnxnk4xN_SIMD_4xN;
1598 #ifdef GMX_NBNXN_SIMD_2XNN
1599 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1602 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1603 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1604 * Currently this is based on the SIMD acceleration choice,
1605 * but it might be better to decide this at runtime based on CPU.
1607 * 4xN calculates more (zero) interactions, but has less pair-search
1608 * work and much better kernel instruction scheduling.
1610 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1611 * which doesn't have FMA, both the analytical and tabulated Ewald
1612 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1613 * 2x(4+4) because it results in significantly fewer pairs.
1614 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1615 * 10% with HT, 50% without HT. As we currently don't detect the actual
1616 * use of HT, use 4x8 to avoid a potential performance hit.
1617 * On Intel Haswell 4x8 is always faster.
1619 *kernel_type = nbnxnk4xN_SIMD_4xN;
1621 #ifndef GMX_SIMD_HAVE_FMA
1622 if (EEL_PME_EWALD(ir->coulombtype) ||
1623 EVDW_PME(ir->vdwtype))
1625 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1626 * There are enough instructions to make 2x(4+4) efficient.
1628 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1631 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1634 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1636 #ifdef GMX_NBNXN_SIMD_4XN
1637 *kernel_type = nbnxnk4xN_SIMD_4xN;
1639 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1642 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1644 #ifdef GMX_NBNXN_SIMD_2XNN
1645 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1647 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1651 /* Analytical Ewald exclusion correction is only an option in
1653 * Since table lookup's don't parallelize with SIMD, analytical
1654 * will probably always be faster for a SIMD width of 8 or more.
1655 * With FMA analytical is sometimes faster for a width if 4 as well.
1656 * On BlueGene/Q, this is faster regardless of precision.
1657 * In single precision, this is faster on Bulldozer.
1659 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1660 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1661 defined GMX_SIMD_IBM_QPX
1662 *ewald_excl = ewaldexclAnalytical;
1664 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1666 *ewald_excl = ewaldexclTable;
1668 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1670 *ewald_excl = ewaldexclAnalytical;
1674 #endif /* GMX_NBNXN_SIMD */
1678 const char *lookup_nbnxn_kernel_name(int kernel_type)
1680 const char *returnvalue = NULL;
1681 switch (kernel_type)
1684 returnvalue = "not set";
1686 case nbnxnk4x4_PlainC:
1687 returnvalue = "plain C";
1689 case nbnxnk4xN_SIMD_4xN:
1690 case nbnxnk4xN_SIMD_2xNN:
1691 #ifdef GMX_NBNXN_SIMD
1692 #if defined GMX_SIMD_X86_SSE2
1693 returnvalue = "SSE2";
1694 #elif defined GMX_SIMD_X86_SSE4_1
1695 returnvalue = "SSE4.1";
1696 #elif defined GMX_SIMD_X86_AVX_128_FMA
1697 returnvalue = "AVX_128_FMA";
1698 #elif defined GMX_SIMD_X86_AVX_256
1699 returnvalue = "AVX_256";
1700 #elif defined GMX_SIMD_X86_AVX2_256
1701 returnvalue = "AVX2_256";
1703 returnvalue = "SIMD";
1705 #else /* GMX_NBNXN_SIMD */
1706 returnvalue = "not available";
1707 #endif /* GMX_NBNXN_SIMD */
1709 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1710 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1714 gmx_fatal(FARGS, "Illegal kernel type selected");
1721 static void pick_nbnxn_kernel(FILE *fp,
1722 const t_commrec *cr,
1723 gmx_bool use_simd_kernels,
1725 gmx_bool bEmulateGPU,
1726 const t_inputrec *ir,
1729 gmx_bool bDoNonbonded)
1731 assert(kernel_type);
1733 *kernel_type = nbnxnkNotSet;
1734 *ewald_excl = ewaldexclTable;
1738 *kernel_type = nbnxnk8x8x8_PlainC;
1742 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1747 *kernel_type = nbnxnk8x8x8_CUDA;
1750 if (*kernel_type == nbnxnkNotSet)
1752 /* LJ PME with LB combination rule does 7 mesh operations.
1753 * This so slow that we don't compile SIMD non-bonded kernels for that.
1755 if (use_simd_kernels &&
1756 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1758 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1762 *kernel_type = nbnxnk4x4_PlainC;
1766 if (bDoNonbonded && fp != NULL)
1768 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1769 lookup_nbnxn_kernel_name(*kernel_type),
1770 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1771 nbnxn_kernel_to_cj_size(*kernel_type));
1773 if (nbnxnk4x4_PlainC == *kernel_type ||
1774 nbnxnk8x8x8_PlainC == *kernel_type)
1776 md_print_warn(cr, fp,
1777 "WARNING: Using the slow %s kernels. This should\n"
1778 "not happen during routine usage on supported platforms.\n\n",
1779 lookup_nbnxn_kernel_name(*kernel_type));
1784 static void pick_nbnxn_resources(const t_commrec *cr,
1785 const gmx_hw_info_t *hwinfo,
1786 gmx_bool bDoNonbonded,
1788 gmx_bool *bEmulateGPU,
1789 const gmx_gpu_opt_t *gpu_opt)
1791 gmx_bool bEmulateGPUEnvVarSet;
1792 char gpu_err_str[STRLEN];
1796 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1798 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1799 * GPUs (currently) only handle non-bonded calculations, we will
1800 * automatically switch to emulation if non-bonded calculations are
1801 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1802 * way to turn off GPU initialization, data movement, and cleanup.
1804 * GPU emulation can be useful to assess the performance one can expect by
1805 * adding GPU(s) to the machine. The conditional below allows this even
1806 * if mdrun is compiled without GPU acceleration support.
1807 * Note that you should freezing the system as otherwise it will explode.
1809 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1811 gpu_opt->ncuda_dev_use > 0));
1813 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1815 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1817 /* Each PP node will use the intra-node id-th device from the
1818 * list of detected/selected GPUs. */
1819 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1820 &hwinfo->gpu_info, gpu_opt))
1822 /* At this point the init should never fail as we made sure that
1823 * we have all the GPUs we need. If it still does, we'll bail. */
1824 gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1826 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1827 cr->rank_pp_intranode),
1831 /* Here we actually turn on hardware GPU acceleration */
1836 gmx_bool uses_simple_tables(int cutoff_scheme,
1837 nonbonded_verlet_t *nbv,
1840 gmx_bool bUsesSimpleTables = TRUE;
1843 switch (cutoff_scheme)
1846 bUsesSimpleTables = TRUE;
1849 assert(NULL != nbv && NULL != nbv->grp);
1850 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1851 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1854 gmx_incons("unimplemented");
1856 return bUsesSimpleTables;
1859 static void init_ewald_f_table(interaction_const_t *ic,
1860 gmx_bool bUsesSimpleTables,
1865 if (bUsesSimpleTables)
1867 /* Get the Ewald table spacing based on Coulomb and/or LJ
1868 * Ewald coefficients and rtol.
1870 ic->tabq_scale = ewald_spline3_table_scale(ic);
1872 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1873 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1877 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1878 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1879 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1882 sfree_aligned(ic->tabq_coul_FDV0);
1883 sfree_aligned(ic->tabq_coul_F);
1884 sfree_aligned(ic->tabq_coul_V);
1886 sfree_aligned(ic->tabq_vdw_FDV0);
1887 sfree_aligned(ic->tabq_vdw_F);
1888 sfree_aligned(ic->tabq_vdw_V);
1890 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1892 /* Create the original table data in FDV0 */
1893 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1894 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1895 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1896 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1897 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1900 if (EVDW_PME(ic->vdwtype))
1902 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1903 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1904 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1905 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1906 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1910 void init_interaction_const_tables(FILE *fp,
1911 interaction_const_t *ic,
1912 gmx_bool bUsesSimpleTables,
1917 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1919 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1923 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1924 1/ic->tabq_scale, ic->tabq_size);
1929 static void clear_force_switch_constants(shift_consts_t *sc)
1936 static void force_switch_constants(real p,
1940 /* Here we determine the coefficient for shifting the force to zero
1941 * between distance rsw and the cut-off rc.
1942 * For a potential of r^-p, we have force p*r^-(p+1).
1943 * But to save flops we absorb p in the coefficient.
1945 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1946 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1948 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1949 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1950 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1953 static void potential_switch_constants(real rsw, real rc,
1954 switch_consts_t *sc)
1956 /* The switch function is 1 at rsw and 0 at rc.
1957 * The derivative and second derivate are zero at both ends.
1958 * rsw = max(r - r_switch, 0)
1959 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1960 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1961 * force = force*dsw - potential*sw
1964 sc->c3 = -10*pow(rc - rsw, -3);
1965 sc->c4 = 15*pow(rc - rsw, -4);
1966 sc->c5 = -6*pow(rc - rsw, -5);
1970 init_interaction_const(FILE *fp,
1971 const t_commrec gmx_unused *cr,
1972 interaction_const_t **interaction_const,
1973 const t_forcerec *fr,
1976 interaction_const_t *ic;
1977 gmx_bool bUsesSimpleTables = TRUE;
1981 /* Just allocate something so we can free it */
1982 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1983 snew_aligned(ic->tabq_coul_F, 16, 32);
1984 snew_aligned(ic->tabq_coul_V, 16, 32);
1986 ic->rlist = fr->rlist;
1987 ic->rlistlong = fr->rlistlong;
1990 ic->vdwtype = fr->vdwtype;
1991 ic->vdw_modifier = fr->vdw_modifier;
1992 ic->rvdw = fr->rvdw;
1993 ic->rvdw_switch = fr->rvdw_switch;
1994 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1995 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1996 ic->sh_lj_ewald = 0;
1997 clear_force_switch_constants(&ic->dispersion_shift);
1998 clear_force_switch_constants(&ic->repulsion_shift);
2000 switch (ic->vdw_modifier)
2002 case eintmodPOTSHIFT:
2003 /* Only shift the potential, don't touch the force */
2004 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2005 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
2006 if (EVDW_PME(ic->vdwtype))
2010 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2011 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2014 case eintmodFORCESWITCH:
2015 /* Switch the force, switch and shift the potential */
2016 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2017 &ic->dispersion_shift);
2018 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2019 &ic->repulsion_shift);
2021 case eintmodPOTSWITCH:
2022 /* Switch the potential and force */
2023 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2027 case eintmodEXACTCUTOFF:
2028 /* Nothing to do here */
2031 gmx_incons("unimplemented potential modifier");
2034 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2036 /* Electrostatics */
2037 ic->eeltype = fr->eeltype;
2038 ic->coulomb_modifier = fr->coulomb_modifier;
2039 ic->rcoulomb = fr->rcoulomb;
2040 ic->epsilon_r = fr->epsilon_r;
2041 ic->epsfac = fr->epsfac;
2042 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2044 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2046 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2053 /* Reaction-field */
2054 if (EEL_RF(ic->eeltype))
2056 ic->epsilon_rf = fr->epsilon_rf;
2057 ic->k_rf = fr->k_rf;
2058 ic->c_rf = fr->c_rf;
2062 /* For plain cut-off we might use the reaction-field kernels */
2063 ic->epsilon_rf = ic->epsilon_r;
2065 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2067 ic->c_rf = 1/ic->rcoulomb;
2077 real dispersion_shift;
2079 dispersion_shift = ic->dispersion_shift.cpot;
2080 if (EVDW_PME(ic->vdwtype))
2082 dispersion_shift -= ic->sh_lj_ewald;
2084 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2085 ic->repulsion_shift.cpot, dispersion_shift);
2087 if (ic->eeltype == eelCUT)
2089 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2091 else if (EEL_PME(ic->eeltype))
2093 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2098 *interaction_const = ic;
2100 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2102 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2104 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2105 * also sharing texture references. To keep the code simple, we don't
2106 * treat texture references as shared resources, but this means that
2107 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2108 * Hence, to ensure that the non-bonded kernels don't start before all
2109 * texture binding operations are finished, we need to wait for all ranks
2110 * to arrive here before continuing.
2112 * Note that we could omit this barrier if GPUs are not shared (or
2113 * texture objects are used), but as this is initialization code, there
2114 * is not point in complicating things.
2116 #ifdef GMX_THREAD_MPI
2121 #endif /* GMX_THREAD_MPI */
2124 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2125 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2128 static void init_nb_verlet(FILE *fp,
2129 nonbonded_verlet_t **nb_verlet,
2130 gmx_bool bFEP_NonBonded,
2131 const t_inputrec *ir,
2132 const t_forcerec *fr,
2133 const t_commrec *cr,
2134 const char *nbpu_opt)
2136 nonbonded_verlet_t *nbv;
2139 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2141 nbnxn_alloc_t *nb_alloc;
2142 nbnxn_free_t *nb_free;
2146 pick_nbnxn_resources(cr, fr->hwinfo,
2154 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2155 for (i = 0; i < nbv->ngrp; i++)
2157 nbv->grp[i].nbl_lists.nnbl = 0;
2158 nbv->grp[i].nbat = NULL;
2159 nbv->grp[i].kernel_type = nbnxnkNotSet;
2161 if (i == 0) /* local */
2163 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2164 nbv->bUseGPU, bEmulateGPU, ir,
2165 &nbv->grp[i].kernel_type,
2166 &nbv->grp[i].ewald_excl,
2169 else /* non-local */
2171 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2173 /* Use GPU for local, select a CPU kernel for non-local */
2174 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2176 &nbv->grp[i].kernel_type,
2177 &nbv->grp[i].ewald_excl,
2180 bHybridGPURun = TRUE;
2184 /* Use the same kernel for local and non-local interactions */
2185 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2186 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2193 /* init the NxN GPU data; the last argument tells whether we'll have
2194 * both local and non-local NB calculation on GPU */
2195 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2196 &fr->hwinfo->gpu_info, fr->gpu_opt,
2197 cr->rank_pp_intranode,
2198 (nbv->ngrp > 1) && !bHybridGPURun);
2200 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2204 nbv->min_ci_balanced = strtol(env, &end, 10);
2205 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2207 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2212 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2213 nbv->min_ci_balanced);
2218 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2221 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2222 nbv->min_ci_balanced);
2228 nbv->min_ci_balanced = 0;
2233 nbnxn_init_search(&nbv->nbs,
2234 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2235 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2237 gmx_omp_nthreads_get(emntPairsearch));
2239 for (i = 0; i < nbv->ngrp; i++)
2241 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2243 nb_alloc = &pmalloc;
2252 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2253 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2254 /* 8x8x8 "non-simple" lists are ATM always combined */
2255 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2259 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2261 gmx_bool bSimpleList;
2262 int enbnxninitcombrule;
2264 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2266 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2268 /* Plain LJ cut-off: we can optimize with combination rules */
2269 enbnxninitcombrule = enbnxninitcombruleDETECT;
2271 else if (fr->vdwtype == evdwPME)
2273 /* LJ-PME: we need to use a combination rule for the grid */
2274 if (fr->ljpme_combination_rule == eljpmeGEOM)
2276 enbnxninitcombrule = enbnxninitcombruleGEOM;
2280 enbnxninitcombrule = enbnxninitcombruleLB;
2285 /* We use a full combination matrix: no rule required */
2286 enbnxninitcombrule = enbnxninitcombruleNONE;
2290 snew(nbv->grp[i].nbat, 1);
2291 nbnxn_atomdata_init(fp,
2293 nbv->grp[i].kernel_type,
2295 fr->ntype, fr->nbfp,
2297 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2302 nbv->grp[i].nbat = nbv->grp[0].nbat;
2307 void init_forcerec(FILE *fp,
2308 const output_env_t oenv,
2311 const t_inputrec *ir,
2312 const gmx_mtop_t *mtop,
2313 const t_commrec *cr,
2319 const char *nbpu_opt,
2320 gmx_bool bNoSolvOpt,
2323 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2328 gmx_bool bGenericKernelOnly;
2329 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2330 gmx_bool bFEP_NonBonded;
2332 int *nm_ind, egp_flags;
2334 if (fr->hwinfo == NULL)
2336 /* Detect hardware, gather information.
2337 * In mdrun, hwinfo has already been set before calling init_forcerec.
2338 * Here we ignore GPUs, as tools will not use them anyhow.
2340 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2343 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2344 fr->use_simd_kernels = TRUE;
2346 fr->bDomDec = DOMAINDECOMP(cr);
2348 natoms = mtop->natoms;
2350 if (check_box(ir->ePBC, box))
2352 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2355 /* Test particle insertion ? */
2358 /* Set to the size of the molecule to be inserted (the last one) */
2359 /* Because of old style topologies, we have to use the last cg
2360 * instead of the last molecule type.
2362 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2363 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2364 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2366 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2374 /* Copy AdResS parameters */
2377 fr->adress_type = ir->adress->type;
2378 fr->adress_const_wf = ir->adress->const_wf;
2379 fr->adress_ex_width = ir->adress->ex_width;
2380 fr->adress_hy_width = ir->adress->hy_width;
2381 fr->adress_icor = ir->adress->icor;
2382 fr->adress_site = ir->adress->site;
2383 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2384 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2387 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2388 for (i = 0; i < ir->adress->n_energy_grps; i++)
2390 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2393 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2394 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2395 for (i = 0; i < fr->n_adress_tf_grps; i++)
2397 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2399 copy_rvec(ir->adress->refs, fr->adress_refs);
2403 fr->adress_type = eAdressOff;
2404 fr->adress_do_hybridpairs = FALSE;
2407 /* Copy the user determined parameters */
2408 fr->userint1 = ir->userint1;
2409 fr->userint2 = ir->userint2;
2410 fr->userint3 = ir->userint3;
2411 fr->userint4 = ir->userint4;
2412 fr->userreal1 = ir->userreal1;
2413 fr->userreal2 = ir->userreal2;
2414 fr->userreal3 = ir->userreal3;
2415 fr->userreal4 = ir->userreal4;
2418 fr->fc_stepsize = ir->fc_stepsize;
2421 fr->efep = ir->efep;
2422 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2423 if (ir->fepvals->bScCoul)
2425 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2426 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2430 fr->sc_alphacoul = 0;
2431 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2433 fr->sc_power = ir->fepvals->sc_power;
2434 fr->sc_r_power = ir->fepvals->sc_r_power;
2435 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2437 env = getenv("GMX_SCSIGMA_MIN");
2441 sscanf(env, "%lf", &dbl);
2442 fr->sc_sigma6_min = pow(dbl, 6);
2445 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2449 fr->bNonbonded = TRUE;
2450 if (getenv("GMX_NO_NONBONDED") != NULL)
2452 /* turn off non-bonded calculations */
2453 fr->bNonbonded = FALSE;
2454 md_print_warn(cr, fp,
2455 "Found environment variable GMX_NO_NONBONDED.\n"
2456 "Disabling nonbonded calculations.\n");
2459 bGenericKernelOnly = FALSE;
2461 /* We now check in the NS code whether a particular combination of interactions
2462 * can be used with water optimization, and disable it if that is not the case.
2465 if (getenv("GMX_NB_GENERIC") != NULL)
2470 "Found environment variable GMX_NB_GENERIC.\n"
2471 "Disabling all interaction-specific nonbonded kernels, will only\n"
2472 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2474 bGenericKernelOnly = TRUE;
2477 if (bGenericKernelOnly == TRUE)
2482 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2484 fr->use_simd_kernels = FALSE;
2488 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2489 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2493 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2495 /* Check if we can/should do all-vs-all kernels */
2496 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2497 fr->AllvsAll_work = NULL;
2498 fr->AllvsAll_workgb = NULL;
2500 /* All-vs-all kernels have not been implemented in 4.6, and
2501 * the SIMD group kernels are also buggy in this case. Non-SIMD
2502 * group kernels are OK. See Redmine #1249. */
2505 fr->bAllvsAll = FALSE;
2506 fr->use_simd_kernels = FALSE;
2510 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2511 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2512 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2513 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2514 "we are proceeding by disabling all CPU architecture-specific\n"
2515 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2516 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2520 /* Neighbour searching stuff */
2521 fr->cutoff_scheme = ir->cutoff_scheme;
2522 fr->bGrid = (ir->ns_type == ensGRID);
2523 fr->ePBC = ir->ePBC;
2525 if (fr->cutoff_scheme == ecutsGROUP)
2527 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2528 "removed in a future release when 'verlet' supports all interaction forms.\n";
2532 fprintf(stderr, "\n%s\n", note);
2536 fprintf(fp, "\n%s\n", note);
2540 /* Determine if we will do PBC for distances in bonded interactions */
2541 if (fr->ePBC == epbcNONE)
2543 fr->bMolPBC = FALSE;
2547 if (!DOMAINDECOMP(cr))
2549 /* The group cut-off scheme and SHAKE assume charge groups
2550 * are whole, but not using molpbc is faster in most cases.
2552 if (fr->cutoff_scheme == ecutsGROUP ||
2553 (ir->eConstrAlg == econtSHAKE &&
2554 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2555 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2557 fr->bMolPBC = ir->bPeriodicMols;
2562 if (getenv("GMX_USE_GRAPH") != NULL)
2564 fr->bMolPBC = FALSE;
2567 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2574 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2577 fr->bGB = (ir->implicit_solvent == eisGBSA);
2579 fr->rc_scaling = ir->refcoord_scaling;
2580 copy_rvec(ir->posres_com, fr->posres_com);
2581 copy_rvec(ir->posres_comB, fr->posres_comB);
2582 fr->rlist = cutoff_inf(ir->rlist);
2583 fr->rlistlong = cutoff_inf(ir->rlistlong);
2584 fr->eeltype = ir->coulombtype;
2585 fr->vdwtype = ir->vdwtype;
2586 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2588 fr->coulomb_modifier = ir->coulomb_modifier;
2589 fr->vdw_modifier = ir->vdw_modifier;
2591 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2592 switch (fr->eeltype)
2595 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2601 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2605 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2606 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2615 case eelPMEUSERSWITCH:
2616 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2621 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2625 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2629 /* Vdw: Translate from mdp settings to kernel format */
2630 switch (fr->vdwtype)
2635 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2639 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2643 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2649 case evdwENCADSHIFT:
2650 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2654 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2658 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2659 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2660 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2662 fr->rvdw = cutoff_inf(ir->rvdw);
2663 fr->rvdw_switch = ir->rvdw_switch;
2664 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2665 fr->rcoulomb_switch = ir->rcoulomb_switch;
2667 fr->bTwinRange = fr->rlistlong > fr->rlist;
2668 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2670 fr->reppow = mtop->ffparams.reppow;
2672 if (ir->cutoff_scheme == ecutsGROUP)
2674 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2675 && !EVDW_PME(fr->vdwtype));
2676 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2677 fr->bcoultab = !(fr->eeltype == eelCUT ||
2678 fr->eeltype == eelEWALD ||
2679 fr->eeltype == eelPME ||
2680 fr->eeltype == eelRF ||
2681 fr->eeltype == eelRF_ZERO);
2683 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2684 * going to be faster to tabulate the interaction than calling the generic kernel.
2685 * However, if generic kernels have been requested we keep things analytically.
2687 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2688 fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2689 bGenericKernelOnly == FALSE)
2691 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2693 fr->bcoultab = TRUE;
2694 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2695 * which would otherwise need two tables.
2699 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2700 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2701 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2702 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2704 if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2706 fr->bcoultab = TRUE;
2710 if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2712 fr->bcoultab = TRUE;
2714 if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2719 if (getenv("GMX_REQUIRE_TABLES"))
2722 fr->bcoultab = TRUE;
2727 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2728 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2731 if (fr->bvdwtab == TRUE)
2733 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2734 fr->nbkernel_vdw_modifier = eintmodNONE;
2736 if (fr->bcoultab == TRUE)
2738 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2739 fr->nbkernel_elec_modifier = eintmodNONE;
2743 if (ir->cutoff_scheme == ecutsVERLET)
2745 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2747 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2749 fr->bvdwtab = FALSE;
2750 fr->bcoultab = FALSE;
2753 /* Tables are used for direct ewald sum */
2756 if (EEL_PME(ir->coulombtype))
2760 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2762 if (ir->coulombtype == eelP3M_AD)
2764 please_cite(fp, "Hockney1988");
2765 please_cite(fp, "Ballenegger2012");
2769 please_cite(fp, "Essmann95a");
2772 if (ir->ewald_geometry == eewg3DC)
2776 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2778 please_cite(fp, "In-Chul99a");
2781 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2782 init_ewald_tab(&(fr->ewald_table), ir, fp);
2785 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2786 1/fr->ewaldcoeff_q);
2790 if (EVDW_PME(ir->vdwtype))
2794 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2796 please_cite(fp, "Essmann95a");
2797 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2800 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2801 1/fr->ewaldcoeff_lj);
2805 /* Electrostatics */
2806 fr->epsilon_r = ir->epsilon_r;
2807 fr->epsilon_rf = ir->epsilon_rf;
2808 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2810 /* Parameters for generalized RF */
2814 if (fr->eeltype == eelGRF)
2816 init_generalized_rf(fp, mtop, ir, fr);
2819 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2820 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2821 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2822 IR_ELEC_FIELD(*ir) ||
2823 (fr->adress_icor != eAdressICOff)
2826 if (fr->cutoff_scheme == ecutsGROUP &&
2827 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2829 /* Count the total number of charge groups */
2830 fr->cg_nalloc = ncg_mtop(mtop);
2831 srenew(fr->cg_cm, fr->cg_nalloc);
2833 if (fr->shift_vec == NULL)
2835 snew(fr->shift_vec, SHIFTS);
2838 if (fr->fshift == NULL)
2840 snew(fr->fshift, SHIFTS);
2843 if (fr->nbfp == NULL)
2845 fr->ntype = mtop->ffparams.atnr;
2846 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2847 if (EVDW_PME(fr->vdwtype))
2849 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2853 /* Copy the energy group exclusions */
2854 fr->egp_flags = ir->opts.egp_flags;
2856 /* Van der Waals stuff */
2857 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2859 if (fr->rvdw_switch >= fr->rvdw)
2861 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2862 fr->rvdw_switch, fr->rvdw);
2866 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2867 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2868 fr->rvdw_switch, fr->rvdw);
2872 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2874 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2877 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2879 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2882 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2884 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2889 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2890 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2893 fr->eDispCorr = ir->eDispCorr;
2894 if (ir->eDispCorr != edispcNO)
2896 set_avcsixtwelve(fp, fr, mtop);
2901 set_bham_b_max(fp, fr, mtop);
2904 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2906 /* Copy the GBSA data (radius, volume and surftens for each
2907 * atomtype) from the topology atomtype section to forcerec.
2909 snew(fr->atype_radius, fr->ntype);
2910 snew(fr->atype_vol, fr->ntype);
2911 snew(fr->atype_surftens, fr->ntype);
2912 snew(fr->atype_gb_radius, fr->ntype);
2913 snew(fr->atype_S_hct, fr->ntype);
2915 if (mtop->atomtypes.nr > 0)
2917 for (i = 0; i < fr->ntype; i++)
2919 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2921 for (i = 0; i < fr->ntype; i++)
2923 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2925 for (i = 0; i < fr->ntype; i++)
2927 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2929 for (i = 0; i < fr->ntype; i++)
2931 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2933 for (i = 0; i < fr->ntype; i++)
2935 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2939 /* Generate the GB table if needed */
2943 fr->gbtabscale = 2000;
2945 fr->gbtabscale = 500;
2949 fr->gbtab = make_gb_table(oenv, fr);
2951 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2953 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2954 if (!DOMAINDECOMP(cr))
2956 make_local_gb(cr, fr->born, ir->gb_algorithm);
2960 /* Set the charge scaling */
2961 if (fr->epsilon_r != 0)
2963 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2967 /* eps = 0 is infinite dieletric: no coulomb interactions */
2971 /* Reaction field constants */
2972 if (EEL_RF(fr->eeltype))
2974 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2975 fr->rcoulomb, fr->temp, fr->zsquare, box,
2976 &fr->kappa, &fr->k_rf, &fr->c_rf);
2979 /*This now calculates sum for q and c6*/
2980 set_chargesum(fp, fr, mtop);
2982 /* if we are using LR electrostatics, and they are tabulated,
2983 * the tables will contain modified coulomb interactions.
2984 * Since we want to use the non-shifted ones for 1-4
2985 * coulombic interactions, we must have an extra set of tables.
2988 /* Construct tables.
2989 * A little unnecessary to make both vdw and coul tables sometimes,
2990 * but what the heck... */
2992 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2993 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2995 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2996 fr->coulomb_modifier != eintmodNONE ||
2997 fr->vdw_modifier != eintmodNONE ||
2998 fr->bBHAM || fr->bEwald) &&
2999 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
3000 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
3001 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
3003 negp_pp = ir->opts.ngener - ir->nwall;
3007 bSomeNormalNbListsAreInUse = TRUE;
3012 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
3013 for (egi = 0; egi < negp_pp; egi++)
3015 for (egj = egi; egj < negp_pp; egj++)
3017 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3018 if (!(egp_flags & EGP_EXCL))
3020 if (egp_flags & EGP_TABLE)
3026 bSomeNormalNbListsAreInUse = TRUE;
3031 if (bSomeNormalNbListsAreInUse)
3033 fr->nnblists = negptable + 1;
3037 fr->nnblists = negptable;
3039 if (fr->nnblists > 1)
3041 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3050 snew(fr->nblists, fr->nnblists);
3052 /* This code automatically gives table length tabext without cut-off's,
3053 * in that case grompp should already have checked that we do not need
3054 * normal tables and we only generate tables for 1-4 interactions.
3056 rtab = ir->rlistlong + ir->tabext;
3060 /* make tables for ordinary interactions */
3061 if (bSomeNormalNbListsAreInUse)
3063 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3066 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3068 if (!bMakeSeparate14Table)
3070 fr->tab14 = fr->nblists[0].table_elec_vdw;
3080 /* Read the special tables for certain energy group pairs */
3081 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3082 for (egi = 0; egi < negp_pp; egi++)
3084 for (egj = egi; egj < negp_pp; egj++)
3086 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3087 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3089 nbl = &(fr->nblists[m]);
3090 if (fr->nnblists > 1)
3092 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3094 /* Read the table file with the two energy groups names appended */
3095 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3096 *mtop->groups.grpname[nm_ind[egi]],
3097 *mtop->groups.grpname[nm_ind[egj]],
3101 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3102 *mtop->groups.grpname[nm_ind[egi]],
3103 *mtop->groups.grpname[nm_ind[egj]],
3104 &fr->nblists[fr->nnblists/2+m]);
3108 else if (fr->nnblists > 1)
3110 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3116 else if ((fr->eDispCorr != edispcNO) &&
3117 ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3118 (fr->vdw_modifier == eintmodFORCESWITCH) ||
3119 (fr->vdw_modifier == eintmodPOTSHIFT)))
3121 /* Tables might not be used for the potential modifier interactions per se, but
3122 * we still need them to evaluate switch/shift dispersion corrections in this case.
3124 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3127 if (bMakeSeparate14Table)
3129 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3130 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3131 GMX_MAKETABLES_14ONLY);
3134 /* Read AdResS Thermo Force table if needed */
3135 if (fr->adress_icor == eAdressICThermoForce)
3137 /* old todo replace */
3139 if (ir->adress->n_tf_grps > 0)
3141 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3146 /* load the default table */
3147 snew(fr->atf_tabs, 1);
3148 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3153 fr->nwall = ir->nwall;
3154 if (ir->nwall && ir->wall_type == ewtTABLE)
3156 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3161 fcd->bondtab = make_bonded_tables(fp,
3162 F_TABBONDS, F_TABBONDSNC,
3164 fcd->angletab = make_bonded_tables(fp,
3167 fcd->dihtab = make_bonded_tables(fp,
3175 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3179 /* QM/MM initialization if requested
3183 fprintf(stderr, "QM/MM calculation requested.\n");
3186 fr->bQMMM = ir->bQMMM;
3187 fr->qr = mk_QMMMrec();
3189 /* Set all the static charge group info */
3190 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3192 &fr->bExcl_IntraCGAll_InterCGNone);
3193 if (DOMAINDECOMP(cr))
3199 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3202 if (!DOMAINDECOMP(cr))
3204 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3205 mtop->natoms, mtop->natoms, mtop->natoms);
3208 fr->print_force = print_force;
3211 /* coarse load balancing vars */
3216 /* Initialize neighbor search */
3217 init_ns(fp, cr, &fr->ns, fr, mtop);
3219 if (cr->duty & DUTY_PP)
3221 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3225 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3230 /* Initialize the thread working data for bonded interactions */
3231 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3233 snew(fr->excl_load, fr->nthreads+1);
3235 if (fr->cutoff_scheme == ecutsVERLET)
3237 if (ir->rcoulomb != ir->rvdw)
3239 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3242 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3245 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3246 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3248 if (ir->eDispCorr != edispcNO)
3250 calc_enervirdiff(fp, ir->eDispCorr, fr);
3254 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3255 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3256 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3258 void pr_forcerec(FILE *fp, t_forcerec *fr)
3262 pr_real(fp, fr->rlist);
3263 pr_real(fp, fr->rcoulomb);
3264 pr_real(fp, fr->fudgeQQ);
3265 pr_bool(fp, fr->bGrid);
3266 pr_bool(fp, fr->bTwinRange);
3267 /*pr_int(fp,fr->cg0);
3268 pr_int(fp,fr->hcg);*/
3269 for (i = 0; i < fr->nnblists; i++)
3271 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3273 pr_real(fp, fr->rcoulomb_switch);
3274 pr_real(fp, fr->rcoulomb);
3279 void forcerec_set_excl_load(t_forcerec *fr,
3280 const gmx_localtop_t *top)
3283 int t, i, j, ntot, n, ntarget;
3285 ind = top->excls.index;
3289 for (i = 0; i < top->excls.nr; i++)
3291 for (j = ind[i]; j < ind[i+1]; j++)
3300 fr->excl_load[0] = 0;
3303 for (t = 1; t <= fr->nthreads; t++)
3305 ntarget = (ntot*t)/fr->nthreads;
3306 while (i < top->excls.nr && n < ntarget)
3308 for (j = ind[i]; j < ind[i+1]; j++)
3317 fr->excl_load[t] = i;
3321 /* Frees GPU memory and destroys the CUDA context.
3323 * Note that this function needs to be called even if GPUs are not used
3324 * in this run because the PME ranks have no knowledge of whether GPUs
3325 * are used or not, but all ranks need to enter the barrier below.
3327 void free_gpu_resources(const t_forcerec *fr,
3328 const t_commrec *cr)
3330 gmx_bool bIsPPrankUsingGPU;
3331 char gpu_err_str[STRLEN];
3333 bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
3335 if (bIsPPrankUsingGPU)
3337 /* free nbnxn data in GPU memory */
3338 nbnxn_cuda_free(fr->nbv->cu_nbv);
3340 /* With tMPI we need to wait for all ranks to finish deallocation before
3341 * destroying the context in free_gpu() as some ranks may be sharing
3343 * Note: as only PP ranks need to free GPU resources, so it is safe to
3344 * not call the barrier on PME ranks.
3346 #ifdef GMX_THREAD_MPI
3351 #endif /* GMX_THREAD_MPI */
3353 /* uninitialize GPU (by destroying the context) */
3354 if (!free_gpu(gpu_err_str))
3356 gmx_warning("On rank %d failed to free GPU #%d: %s",
3357 cr->nodeid, get_current_gpu_device_id(), gpu_err_str);