2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "types/commrec.h"
48 #include "gromacs/math/utilities.h"
52 #include "gmx_fatal.h"
53 #include "gmx_fatal_collective.h"
57 #include "nonbonded.h"
66 #include "md_support.h"
67 #include "md_logging.h"
71 #include "mtop_util.h"
72 #include "nbnxn_simd.h"
73 #include "nbnxn_search.h"
74 #include "nbnxn_atomdata.h"
75 #include "nbnxn_consts.h"
76 #include "gmx_omp_nthreads.h"
77 #include "gmx_detect_hardware.h"
81 /* MSVC definition for __cpuid() */
85 #include "types/nbnxn_cuda_types_ext.h"
86 #include "gpu_utils.h"
87 #include "nbnxn_cuda_data_mgmt.h"
88 #include "pmalloc_cuda.h"
90 t_forcerec *mk_forcerec(void)
100 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
104 for (i = 0; (i < atnr); i++)
106 for (j = 0; (j < atnr); j++)
108 fprintf(fp, "%2d - %2d", i, j);
111 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
112 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
116 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
117 C12(nbfp, atnr, i, j)/12.0);
124 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
132 snew(nbfp, 3*atnr*atnr);
133 for (i = k = 0; (i < atnr); i++)
135 for (j = 0; (j < atnr); j++, k++)
137 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
138 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
139 /* nbfp now includes the 6.0 derivative prefactor */
140 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
146 snew(nbfp, 2*atnr*atnr);
147 for (i = k = 0; (i < atnr); i++)
149 for (j = 0; (j < atnr); j++, k++)
151 /* nbfp now includes the 6.0/12.0 derivative prefactors */
152 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
153 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
161 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
165 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
169 snew(nbfp, 2*atnr*atnr);
170 for (i = 0; i < atnr; ++i)
172 for (j = 0; j < atnr; ++j)
174 c6i = idef->iparams[i*(atnr+1)].lj.c6;
175 c12i = idef->iparams[i*(atnr+1)].lj.c12;
176 c6j = idef->iparams[j*(atnr+1)].lj.c6;
177 c12j = idef->iparams[j*(atnr+1)].lj.c12;
178 c6 = sqrt(c6i * c6j);
179 c12 = sqrt(c12i * c12j);
180 if (comb_rule == eCOMB_ARITHMETIC
181 && !gmx_numzero(c6) && !gmx_numzero(c12))
183 sigmai = pow(c12i / c6i, 1.0/6.0);
184 sigmaj = pow(c12j / c6j, 1.0/6.0);
185 epsi = c6i * c6i / c12i;
186 epsj = c6j * c6j / c12j;
187 c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
188 c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
190 C6(nbfp, atnr, i, j) = c6*6.0;
191 C12(nbfp, atnr, i, j) = c12*12.0;
197 /* This routine sets fr->solvent_opt to the most common solvent in the
198 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
199 * the fr->solvent_type array with the correct type (or esolNO).
201 * Charge groups that fulfill the conditions but are not identical to the
202 * most common one will be marked as esolNO in the solvent_type array.
204 * TIP3p is identical to SPC for these purposes, so we call it
205 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
207 * NOTE: QM particle should not
208 * become an optimized solvent. Not even if there is only one charge
218 } solvent_parameters_t;
221 check_solvent_cg(const gmx_moltype_t *molt,
224 const unsigned char *qm_grpnr,
225 const t_grps *qm_grps,
227 int *n_solvent_parameters,
228 solvent_parameters_t **solvent_parameters_p,
232 const t_blocka *excl;
239 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
240 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
243 solvent_parameters_t *solvent_parameters;
245 /* We use a list with parameters for each solvent type.
246 * Every time we discover a new molecule that fulfills the basic
247 * conditions for a solvent we compare with the previous entries
248 * in these lists. If the parameters are the same we just increment
249 * the counter for that type, and otherwise we create a new type
250 * based on the current molecule.
252 * Once we've finished going through all molecules we check which
253 * solvent is most common, and mark all those molecules while we
254 * clear the flag on all others.
257 solvent_parameters = *solvent_parameters_p;
259 /* Mark the cg first as non optimized */
262 /* Check if this cg has no exclusions with atoms in other charge groups
263 * and all atoms inside the charge group excluded.
264 * We only have 3 or 4 atom solvent loops.
266 if (GET_CGINFO_EXCL_INTER(cginfo) ||
267 !GET_CGINFO_EXCL_INTRA(cginfo))
272 /* Get the indices of the first atom in this charge group */
273 j0 = molt->cgs.index[cg0];
274 j1 = molt->cgs.index[cg0+1];
276 /* Number of atoms in our molecule */
282 "Moltype '%s': there are %d atoms in this charge group\n",
286 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
289 if (nj < 3 || nj > 4)
294 /* Check if we are doing QM on this group */
296 if (qm_grpnr != NULL)
298 for (j = j0; j < j1 && !qm; j++)
300 qm = (qm_grpnr[j] < qm_grps->nr - 1);
303 /* Cannot use solvent optimization with QM */
309 atom = molt->atoms.atom;
311 /* Still looks like a solvent, time to check parameters */
313 /* If it is perturbed (free energy) we can't use the solvent loops,
314 * so then we just skip to the next molecule.
318 for (j = j0; j < j1 && !perturbed; j++)
320 perturbed = PERTURBED(atom[j]);
328 /* Now it's only a question if the VdW and charge parameters
329 * are OK. Before doing the check we compare and see if they are
330 * identical to a possible previous solvent type.
331 * First we assign the current types and charges.
333 for (j = 0; j < nj; j++)
335 tmp_vdwtype[j] = atom[j0+j].type;
336 tmp_charge[j] = atom[j0+j].q;
339 /* Does it match any previous solvent type? */
340 for (k = 0; k < *n_solvent_parameters; k++)
345 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
346 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
347 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
352 /* Check that types & charges match for all atoms in molecule */
353 for (j = 0; j < nj && match == TRUE; j++)
355 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
359 if (tmp_charge[j] != solvent_parameters[k].charge[j])
366 /* Congratulations! We have a matched solvent.
367 * Flag it with this type for later processing.
370 solvent_parameters[k].count += nmol;
372 /* We are done with this charge group */
377 /* If we get here, we have a tentative new solvent type.
378 * Before we add it we must check that it fulfills the requirements
379 * of the solvent optimized loops. First determine which atoms have
382 for (j = 0; j < nj; j++)
385 tjA = tmp_vdwtype[j];
387 /* Go through all other tpes and see if any have non-zero
388 * VdW parameters when combined with this one.
390 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
392 /* We already checked that the atoms weren't perturbed,
393 * so we only need to check state A now.
397 has_vdw[j] = (has_vdw[j] ||
398 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
399 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
400 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
405 has_vdw[j] = (has_vdw[j] ||
406 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
407 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
412 /* Now we know all we need to make the final check and assignment. */
416 * For this we require thatn all atoms have charge,
417 * the charges on atom 2 & 3 should be the same, and only
418 * atom 1 might have VdW.
420 if (has_vdw[1] == FALSE &&
421 has_vdw[2] == FALSE &&
422 tmp_charge[0] != 0 &&
423 tmp_charge[1] != 0 &&
424 tmp_charge[2] == tmp_charge[1])
426 srenew(solvent_parameters, *n_solvent_parameters+1);
427 solvent_parameters[*n_solvent_parameters].model = esolSPC;
428 solvent_parameters[*n_solvent_parameters].count = nmol;
429 for (k = 0; k < 3; k++)
431 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
432 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
435 *cg_sp = *n_solvent_parameters;
436 (*n_solvent_parameters)++;
441 /* Or could it be a TIP4P?
442 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
443 * Only atom 1 mght have VdW.
445 if (has_vdw[1] == FALSE &&
446 has_vdw[2] == FALSE &&
447 has_vdw[3] == FALSE &&
448 tmp_charge[0] == 0 &&
449 tmp_charge[1] != 0 &&
450 tmp_charge[2] == tmp_charge[1] &&
453 srenew(solvent_parameters, *n_solvent_parameters+1);
454 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
455 solvent_parameters[*n_solvent_parameters].count = nmol;
456 for (k = 0; k < 4; k++)
458 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
459 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
462 *cg_sp = *n_solvent_parameters;
463 (*n_solvent_parameters)++;
467 *solvent_parameters_p = solvent_parameters;
471 check_solvent(FILE * fp,
472 const gmx_mtop_t * mtop,
474 cginfo_mb_t *cginfo_mb)
477 const t_block * mols;
478 const gmx_moltype_t *molt;
479 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
480 int n_solvent_parameters;
481 solvent_parameters_t *solvent_parameters;
487 fprintf(debug, "Going to determine what solvent types we have.\n");
492 n_solvent_parameters = 0;
493 solvent_parameters = NULL;
494 /* Allocate temporary array for solvent type */
495 snew(cg_sp, mtop->nmolblock);
499 for (mb = 0; mb < mtop->nmolblock; mb++)
501 molt = &mtop->moltype[mtop->molblock[mb].type];
503 /* Here we have to loop over all individual molecules
504 * because we need to check for QMMM particles.
506 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
507 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
508 nmol = mtop->molblock[mb].nmol/nmol_ch;
509 for (mol = 0; mol < nmol_ch; mol++)
512 am = mol*cgs->index[cgs->nr];
513 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
515 check_solvent_cg(molt, cg_mol, nmol,
516 mtop->groups.grpnr[egcQMMM] ?
517 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
518 &mtop->groups.grps[egcQMMM],
520 &n_solvent_parameters, &solvent_parameters,
521 cginfo_mb[mb].cginfo[cgm+cg_mol],
522 &cg_sp[mb][cgm+cg_mol]);
525 cg_offset += cgs->nr;
526 at_offset += cgs->index[cgs->nr];
529 /* Puh! We finished going through all charge groups.
530 * Now find the most common solvent model.
533 /* Most common solvent this far */
535 for (i = 0; i < n_solvent_parameters; i++)
538 solvent_parameters[i].count > solvent_parameters[bestsp].count)
546 bestsol = solvent_parameters[bestsp].model;
553 #ifdef DISABLE_WATER_NLIST
558 for (mb = 0; mb < mtop->nmolblock; mb++)
560 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
561 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
562 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
564 if (cg_sp[mb][i] == bestsp)
566 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
571 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
578 if (bestsol != esolNO && fp != NULL)
580 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
582 solvent_parameters[bestsp].count);
585 sfree(solvent_parameters);
586 fr->solvent_opt = bestsol;
590 acNONE = 0, acCONSTRAINT, acSETTLE
593 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
594 t_forcerec *fr, gmx_bool bNoSolvOpt,
595 gmx_bool *bFEP_NonBonded,
596 gmx_bool *bExcl_IntraCGAll_InterCGNone)
599 const t_blocka *excl;
600 const gmx_moltype_t *molt;
601 const gmx_molblock_t *molb;
602 cginfo_mb_t *cginfo_mb;
605 int cg_offset, a_offset, cgm, am;
606 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
610 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
612 ncg_tot = ncg_mtop(mtop);
613 snew(cginfo_mb, mtop->nmolblock);
615 snew(type_VDW, fr->ntype);
616 for (ai = 0; ai < fr->ntype; ai++)
618 type_VDW[ai] = FALSE;
619 for (j = 0; j < fr->ntype; j++)
621 type_VDW[ai] = type_VDW[ai] ||
623 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
624 C12(fr->nbfp, fr->ntype, ai, j) != 0;
628 *bFEP_NonBonded = FALSE;
629 *bExcl_IntraCGAll_InterCGNone = TRUE;
632 snew(bExcl, excl_nalloc);
635 for (mb = 0; mb < mtop->nmolblock; mb++)
637 molb = &mtop->molblock[mb];
638 molt = &mtop->moltype[molb->type];
642 /* Check if the cginfo is identical for all molecules in this block.
643 * If so, we only need an array of the size of one molecule.
644 * Otherwise we make an array of #mol times #cgs per molecule.
648 for (m = 0; m < molb->nmol; m++)
650 am = m*cgs->index[cgs->nr];
651 for (cg = 0; cg < cgs->nr; cg++)
654 a1 = cgs->index[cg+1];
655 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
656 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
660 if (mtop->groups.grpnr[egcQMMM] != NULL)
662 for (ai = a0; ai < a1; ai++)
664 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
665 mtop->groups.grpnr[egcQMMM][a_offset +ai])
674 cginfo_mb[mb].cg_start = cg_offset;
675 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
676 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
677 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
678 cginfo = cginfo_mb[mb].cginfo;
680 /* Set constraints flags for constrained atoms */
681 snew(a_con, molt->atoms.nr);
682 for (ftype = 0; ftype < F_NRE; ftype++)
684 if (interaction_function[ftype].flags & IF_CONSTRAINT)
689 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
693 for (a = 0; a < nral; a++)
695 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
696 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
702 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
705 am = m*cgs->index[cgs->nr];
706 for (cg = 0; cg < cgs->nr; cg++)
709 a1 = cgs->index[cg+1];
711 /* Store the energy group in cginfo */
712 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
713 SET_CGINFO_GID(cginfo[cgm+cg], gid);
715 /* Check the intra/inter charge group exclusions */
716 if (a1-a0 > excl_nalloc)
718 excl_nalloc = a1 - a0;
719 srenew(bExcl, excl_nalloc);
721 /* bExclIntraAll: all intra cg interactions excluded
722 * bExclInter: any inter cg interactions excluded
724 bExclIntraAll = TRUE;
728 bHavePerturbedAtoms = FALSE;
729 for (ai = a0; ai < a1; ai++)
731 /* Check VDW and electrostatic interactions */
732 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
733 type_VDW[molt->atoms.atom[ai].typeB]);
734 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
735 molt->atoms.atom[ai].qB != 0);
737 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
739 /* Clear the exclusion list for atom ai */
740 for (aj = a0; aj < a1; aj++)
742 bExcl[aj-a0] = FALSE;
744 /* Loop over all the exclusions of atom ai */
745 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
748 if (aj < a0 || aj >= a1)
757 /* Check if ai excludes a0 to a1 */
758 for (aj = a0; aj < a1; aj++)
762 bExclIntraAll = FALSE;
769 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
772 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
780 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
784 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
786 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
788 /* The size in cginfo is currently only read with DD */
789 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
793 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
797 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
799 if (bHavePerturbedAtoms && fr->efep != efepNO)
801 SET_CGINFO_FEP(cginfo[cgm+cg]);
802 *bFEP_NonBonded = TRUE;
804 /* Store the charge group size */
805 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
807 if (!bExclIntraAll || bExclInter)
809 *bExcl_IntraCGAll_InterCGNone = FALSE;
816 cg_offset += molb->nmol*cgs->nr;
817 a_offset += molb->nmol*cgs->index[cgs->nr];
821 /* the solvent optimizer is called after the QM is initialized,
822 * because we don't want to have the QM subsystemto become an
826 check_solvent(fplog, mtop, fr, cginfo_mb);
828 if (getenv("GMX_NO_SOLV_OPT"))
832 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
833 "Disabling all solvent optimization\n");
835 fr->solvent_opt = esolNO;
839 fr->solvent_opt = esolNO;
841 if (!fr->solvent_opt)
843 for (mb = 0; mb < mtop->nmolblock; mb++)
845 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
847 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
855 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
860 ncg = cgi_mb[nmb-1].cg_end;
863 for (cg = 0; cg < ncg; cg++)
865 while (cg >= cgi_mb[mb].cg_end)
870 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
876 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
878 /*This now calculates sum for q and c6*/
879 double qsum, q2sum, q, c6sum, c6;
881 const t_atoms *atoms;
886 for (mb = 0; mb < mtop->nmolblock; mb++)
888 nmol = mtop->molblock[mb].nmol;
889 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
890 for (i = 0; i < atoms->nr; i++)
892 q = atoms->atom[i].q;
895 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
900 fr->q2sum[0] = q2sum;
901 fr->c6sum[0] = c6sum;
903 if (fr->efep != efepNO)
908 for (mb = 0; mb < mtop->nmolblock; mb++)
910 nmol = mtop->molblock[mb].nmol;
911 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
912 for (i = 0; i < atoms->nr; i++)
914 q = atoms->atom[i].qB;
917 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
921 fr->q2sum[1] = q2sum;
922 fr->c6sum[1] = c6sum;
927 fr->qsum[1] = fr->qsum[0];
928 fr->q2sum[1] = fr->q2sum[0];
929 fr->c6sum[1] = fr->c6sum[0];
933 if (fr->efep == efepNO)
935 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
939 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
940 fr->qsum[0], fr->qsum[1]);
945 void update_forcerec(t_forcerec *fr, matrix box)
947 if (fr->eeltype == eelGRF)
949 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
950 fr->rcoulomb, fr->temp, fr->zsquare, box,
951 &fr->kappa, &fr->k_rf, &fr->c_rf);
955 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
957 const t_atoms *atoms, *atoms_tpi;
958 const t_blocka *excl;
959 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
960 gmx_int64_t npair, npair_ij, tmpi, tmpj;
961 double csix, ctwelve;
965 real *nbfp_comb = NULL;
971 /* For LJ-PME, we want to correct for the difference between the
972 * actual C6 values and the C6 values used by the LJ-PME based on
973 * combination rules. */
975 if (EVDW_PME(fr->vdwtype))
977 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
978 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
979 for (tpi = 0; tpi < ntp; ++tpi)
981 for (tpj = 0; tpj < ntp; ++tpj)
983 C6(nbfp_comb, ntp, tpi, tpj) =
984 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
985 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
990 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
998 /* Count the types so we avoid natoms^2 operations */
999 snew(typecount, ntp);
1000 gmx_mtop_count_atomtypes(mtop, q, typecount);
1002 for (tpi = 0; tpi < ntp; tpi++)
1004 for (tpj = tpi; tpj < ntp; tpj++)
1006 tmpi = typecount[tpi];
1007 tmpj = typecount[tpj];
1010 npair_ij = tmpi*tmpj;
1014 npair_ij = tmpi*(tmpi - 1)/2;
1018 /* nbfp now includes the 6.0 derivative prefactor */
1019 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1023 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1024 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1025 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1031 /* Subtract the excluded pairs.
1032 * The main reason for substracting exclusions is that in some cases
1033 * some combinations might never occur and the parameters could have
1034 * any value. These unused values should not influence the dispersion
1037 for (mb = 0; mb < mtop->nmolblock; mb++)
1039 nmol = mtop->molblock[mb].nmol;
1040 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1041 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1042 for (i = 0; (i < atoms->nr); i++)
1046 tpi = atoms->atom[i].type;
1050 tpi = atoms->atom[i].typeB;
1052 j1 = excl->index[i];
1053 j2 = excl->index[i+1];
1054 for (j = j1; j < j2; j++)
1061 tpj = atoms->atom[k].type;
1065 tpj = atoms->atom[k].typeB;
1069 /* nbfp now includes the 6.0 derivative prefactor */
1070 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1074 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1075 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1076 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1086 /* Only correct for the interaction of the test particle
1087 * with the rest of the system.
1090 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1093 for (mb = 0; mb < mtop->nmolblock; mb++)
1095 nmol = mtop->molblock[mb].nmol;
1096 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1097 for (j = 0; j < atoms->nr; j++)
1100 /* Remove the interaction of the test charge group
1103 if (mb == mtop->nmolblock-1)
1107 if (mb == 0 && nmol == 1)
1109 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1114 tpj = atoms->atom[j].type;
1118 tpj = atoms->atom[j].typeB;
1120 for (i = 0; i < fr->n_tpi; i++)
1124 tpi = atoms_tpi->atom[i].type;
1128 tpi = atoms_tpi->atom[i].typeB;
1132 /* nbfp now includes the 6.0 derivative prefactor */
1133 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1137 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1138 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1139 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1146 if (npair - nexcl <= 0 && fplog)
1148 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1154 csix /= npair - nexcl;
1155 ctwelve /= npair - nexcl;
1159 fprintf(debug, "Counted %d exclusions\n", nexcl);
1160 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1161 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1163 fr->avcsix[q] = csix;
1164 fr->avctwelve[q] = ctwelve;
1167 if (EVDW_PME(fr->vdwtype))
1174 if (fr->eDispCorr == edispcAllEner ||
1175 fr->eDispCorr == edispcAllEnerPres)
1177 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1178 fr->avcsix[0], fr->avctwelve[0]);
1182 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1188 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1189 const gmx_mtop_t *mtop)
1191 const t_atoms *at1, *at2;
1192 int mt1, mt2, i, j, tpi, tpj, ntypes;
1198 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1205 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1207 at1 = &mtop->moltype[mt1].atoms;
1208 for (i = 0; (i < at1->nr); i++)
1210 tpi = at1->atom[i].type;
1213 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1216 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1218 at2 = &mtop->moltype[mt2].atoms;
1219 for (j = 0; (j < at2->nr); j++)
1221 tpj = at2->atom[j].type;
1224 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1226 b = BHAMB(nbfp, ntypes, tpi, tpj);
1227 if (b > fr->bham_b_max)
1231 if ((b < bmin) || (bmin == -1))
1241 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1242 bmin, fr->bham_b_max);
1246 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1247 t_forcerec *fr, real rtab,
1248 const t_commrec *cr,
1249 const char *tabfn, char *eg1, char *eg2,
1259 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1264 sprintf(buf, "%s", tabfn);
1267 /* Append the two energy group names */
1268 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1269 eg1, eg2, ftp2ext(efXVG));
1271 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1272 /* Copy the contents of the table to separate coulomb and LJ tables too,
1273 * to improve cache performance.
1275 /* For performance reasons we want
1276 * the table data to be aligned to 16-byte. The pointers could be freed
1277 * but currently aren't.
1279 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1280 nbl->table_elec.format = nbl->table_elec_vdw.format;
1281 nbl->table_elec.r = nbl->table_elec_vdw.r;
1282 nbl->table_elec.n = nbl->table_elec_vdw.n;
1283 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1284 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1285 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1286 nbl->table_elec.ninteractions = 1;
1287 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1288 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1290 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1291 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1292 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1293 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1294 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1295 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1296 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1297 nbl->table_vdw.ninteractions = 2;
1298 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1299 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1301 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1303 for (j = 0; j < 4; j++)
1305 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1307 for (j = 0; j < 8; j++)
1309 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1314 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1315 int *ncount, int **count)
1317 const gmx_moltype_t *molt;
1319 int mt, ftype, stride, i, j, tabnr;
1321 for (mt = 0; mt < mtop->nmoltype; mt++)
1323 molt = &mtop->moltype[mt];
1324 for (ftype = 0; ftype < F_NRE; ftype++)
1326 if (ftype == ftype1 || ftype == ftype2)
1328 il = &molt->ilist[ftype];
1329 stride = 1 + NRAL(ftype);
1330 for (i = 0; i < il->nr; i += stride)
1332 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1335 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1337 if (tabnr >= *ncount)
1339 srenew(*count, tabnr+1);
1340 for (j = *ncount; j < tabnr+1; j++)
1353 static bondedtable_t *make_bonded_tables(FILE *fplog,
1354 int ftype1, int ftype2,
1355 const gmx_mtop_t *mtop,
1356 const char *basefn, const char *tabext)
1358 int i, ncount, *count;
1366 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1371 for (i = 0; i < ncount; i++)
1375 sprintf(tabfn, "%s", basefn);
1376 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1377 tabext, i, ftp2ext(efXVG));
1378 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1387 void forcerec_set_ranges(t_forcerec *fr,
1388 int ncg_home, int ncg_force,
1390 int natoms_force_constr, int natoms_f_novirsum)
1395 /* fr->ncg_force is unused in the standard code,
1396 * but it can be useful for modified code dealing with charge groups.
1398 fr->ncg_force = ncg_force;
1399 fr->natoms_force = natoms_force;
1400 fr->natoms_force_constr = natoms_force_constr;
1402 if (fr->natoms_force_constr > fr->nalloc_force)
1404 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1408 srenew(fr->f_twin, fr->nalloc_force);
1412 if (fr->bF_NoVirSum)
1414 fr->f_novirsum_n = natoms_f_novirsum;
1415 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1417 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1418 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1423 fr->f_novirsum_n = 0;
1427 static real cutoff_inf(real cutoff)
1431 cutoff = GMX_CUTOFF_INF;
1437 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1438 t_forcerec *fr, const t_inputrec *ir,
1439 const char *tabfn, const gmx_mtop_t *mtop,
1447 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1451 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1453 sprintf(buf, "%s", tabfn);
1454 for (i = 0; i < ir->adress->n_tf_grps; i++)
1456 j = ir->adress->tf_table_index[i]; /* get energy group index */
1457 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1458 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1461 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1463 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1468 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1475 ir->rcoulomb == 0 &&
1477 ir->ePBC == epbcNONE &&
1478 ir->vdwtype == evdwCUT &&
1479 ir->coulombtype == eelCUT &&
1480 ir->efep == efepNO &&
1481 (ir->implicit_solvent == eisNO ||
1482 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1483 ir->gb_algorithm == egbHCT ||
1484 ir->gb_algorithm == egbOBC))) &&
1485 getenv("GMX_NO_ALLVSALL") == NULL
1488 if (bAllvsAll && ir->opts.ngener > 1)
1490 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1496 fprintf(stderr, "\n%s\n", note);
1500 fprintf(fp, "\n%s\n", note);
1506 if (bAllvsAll && fp && MASTER(cr))
1508 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1515 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1519 /* These thread local data structures are used for bondeds only */
1520 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1522 if (fr->nthreads > 1)
1524 snew(fr->f_t, fr->nthreads);
1525 /* Thread 0 uses the global force and energy arrays */
1526 for (t = 1; t < fr->nthreads; t++)
1528 fr->f_t[t].f = NULL;
1529 fr->f_t[t].f_nalloc = 0;
1530 snew(fr->f_t[t].fshift, SHIFTS);
1531 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1532 for (i = 0; i < egNR; i++)
1534 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1541 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1542 const t_commrec *cr,
1543 const t_inputrec *ir,
1546 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1548 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1549 bGPU ? "GPUs" : "SIMD kernels",
1550 bGPU ? "CPU only" : "plain-C kernels");
1558 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1562 *kernel_type = nbnxnk4x4_PlainC;
1563 *ewald_excl = ewaldexclTable;
1565 #ifdef GMX_NBNXN_SIMD
1567 #ifdef GMX_NBNXN_SIMD_4XN
1568 *kernel_type = nbnxnk4xN_SIMD_4xN;
1570 #ifdef GMX_NBNXN_SIMD_2XNN
1571 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1574 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1575 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1576 * Currently this is based on the SIMD acceleration choice,
1577 * but it might be better to decide this at runtime based on CPU.
1579 * 4xN calculates more (zero) interactions, but has less pair-search
1580 * work and much better kernel instruction scheduling.
1582 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1583 * which doesn't have FMA, both the analytical and tabulated Ewald
1584 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1585 * 2x(4+4) because it results in significantly fewer pairs.
1586 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1587 * 10% with HT, 50% without HT. As we currently don't detect the actual
1588 * use of HT, use 4x8 to avoid a potential performance hit.
1589 * On Intel Haswell 4x8 is always faster.
1591 *kernel_type = nbnxnk4xN_SIMD_4xN;
1593 #ifndef GMX_SIMD_HAVE_FMA
1594 if (EEL_PME_EWALD(ir->coulombtype) ||
1595 EVDW_PME(ir->vdwtype))
1597 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1598 * There are enough instructions to make 2x(4+4) efficient.
1600 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1603 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1606 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1608 #ifdef GMX_NBNXN_SIMD_4XN
1609 *kernel_type = nbnxnk4xN_SIMD_4xN;
1611 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1614 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1616 #ifdef GMX_NBNXN_SIMD_2XNN
1617 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1619 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1623 /* Analytical Ewald exclusion correction is only an option in
1625 * Since table lookup's don't parallelize with SIMD, analytical
1626 * will probably always be faster for a SIMD width of 8 or more.
1627 * With FMA analytical is sometimes faster for a width if 4 as well.
1628 * On BlueGene/Q, this is faster regardless of precision.
1629 * In single precision, this is faster on Bulldozer.
1631 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1632 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1633 defined GMX_SIMD_IBM_QPX
1634 *ewald_excl = ewaldexclAnalytical;
1636 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1638 *ewald_excl = ewaldexclTable;
1640 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1642 *ewald_excl = ewaldexclAnalytical;
1646 #endif /* GMX_NBNXN_SIMD */
1650 const char *lookup_nbnxn_kernel_name(int kernel_type)
1652 const char *returnvalue = NULL;
1653 switch (kernel_type)
1656 returnvalue = "not set";
1658 case nbnxnk4x4_PlainC:
1659 returnvalue = "plain C";
1661 case nbnxnk4xN_SIMD_4xN:
1662 case nbnxnk4xN_SIMD_2xNN:
1663 #ifdef GMX_NBNXN_SIMD
1664 #if defined GMX_SIMD_X86_SSE2
1665 returnvalue = "SSE2";
1666 #elif defined GMX_SIMD_X86_SSE4_1
1667 returnvalue = "SSE4.1";
1668 #elif defined GMX_SIMD_X86_AVX_128_FMA
1669 returnvalue = "AVX_128_FMA";
1670 #elif defined GMX_SIMD_X86_AVX_256
1671 returnvalue = "AVX_256";
1672 #elif defined GMX_SIMD_X86_AVX2_256
1673 returnvalue = "AVX2_256";
1675 returnvalue = "SIMD";
1677 #else /* GMX_NBNXN_SIMD */
1678 returnvalue = "not available";
1679 #endif /* GMX_NBNXN_SIMD */
1681 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1682 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1686 gmx_fatal(FARGS, "Illegal kernel type selected");
1693 static void pick_nbnxn_kernel(FILE *fp,
1694 const t_commrec *cr,
1695 gmx_bool use_simd_kernels,
1697 gmx_bool bEmulateGPU,
1698 const t_inputrec *ir,
1701 gmx_bool bDoNonbonded)
1703 assert(kernel_type);
1705 *kernel_type = nbnxnkNotSet;
1706 *ewald_excl = ewaldexclTable;
1710 *kernel_type = nbnxnk8x8x8_PlainC;
1714 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1719 *kernel_type = nbnxnk8x8x8_CUDA;
1722 if (*kernel_type == nbnxnkNotSet)
1724 /* LJ PME with LB combination rule does 7 mesh operations.
1725 * This so slow that we don't compile SIMD non-bonded kernels for that.
1727 if (use_simd_kernels &&
1728 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1730 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1734 *kernel_type = nbnxnk4x4_PlainC;
1738 if (bDoNonbonded && fp != NULL)
1740 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1741 lookup_nbnxn_kernel_name(*kernel_type),
1742 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1743 nbnxn_kernel_to_cj_size(*kernel_type));
1747 static void pick_nbnxn_resources(const t_commrec *cr,
1748 const gmx_hw_info_t *hwinfo,
1749 gmx_bool bDoNonbonded,
1751 gmx_bool *bEmulateGPU,
1752 const gmx_gpu_opt_t *gpu_opt)
1754 gmx_bool bEmulateGPUEnvVarSet;
1755 char gpu_err_str[STRLEN];
1759 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1761 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1762 * GPUs (currently) only handle non-bonded calculations, we will
1763 * automatically switch to emulation if non-bonded calculations are
1764 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1765 * way to turn off GPU initialization, data movement, and cleanup.
1767 * GPU emulation can be useful to assess the performance one can expect by
1768 * adding GPU(s) to the machine. The conditional below allows this even
1769 * if mdrun is compiled without GPU acceleration support.
1770 * Note that you should freezing the system as otherwise it will explode.
1772 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1774 gpu_opt->ncuda_dev_use > 0));
1776 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1778 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1780 /* Each PP node will use the intra-node id-th device from the
1781 * list of detected/selected GPUs. */
1782 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1783 &hwinfo->gpu_info, gpu_opt))
1785 /* At this point the init should never fail as we made sure that
1786 * we have all the GPUs we need. If it still does, we'll bail. */
1787 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1789 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1790 cr->rank_pp_intranode),
1794 /* Here we actually turn on hardware GPU acceleration */
1799 gmx_bool uses_simple_tables(int cutoff_scheme,
1800 nonbonded_verlet_t *nbv,
1803 gmx_bool bUsesSimpleTables = TRUE;
1806 switch (cutoff_scheme)
1809 bUsesSimpleTables = TRUE;
1812 assert(NULL != nbv && NULL != nbv->grp);
1813 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1814 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1817 gmx_incons("unimplemented");
1819 return bUsesSimpleTables;
1822 static void init_ewald_f_table(interaction_const_t *ic,
1823 gmx_bool bUsesSimpleTables,
1828 if (bUsesSimpleTables)
1830 /* With a spacing of 0.0005 we are at the force summation accuracy
1831 * for the SSE kernels for "normal" atomistic simulations.
1833 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1836 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1837 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1841 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1842 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1843 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1846 sfree_aligned(ic->tabq_coul_FDV0);
1847 sfree_aligned(ic->tabq_coul_F);
1848 sfree_aligned(ic->tabq_coul_V);
1850 /* Create the original table data in FDV0 */
1851 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1852 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1853 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1854 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1855 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1858 void init_interaction_const_tables(FILE *fp,
1859 interaction_const_t *ic,
1860 gmx_bool bUsesSimpleTables,
1865 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1867 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1871 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1872 1/ic->tabq_scale, ic->tabq_size);
1877 static void clear_force_switch_constants(shift_consts_t *sc)
1884 static void force_switch_constants(real p,
1888 /* Here we determine the coefficient for shifting the force to zero
1889 * between distance rsw and the cut-off rc.
1890 * For a potential of r^-p, we have force p*r^-(p+1).
1891 * But to save flops we absorb p in the coefficient.
1893 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1894 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1896 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1897 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1898 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1901 static void potential_switch_constants(real rsw, real rc,
1902 switch_consts_t *sc)
1904 /* The switch function is 1 at rsw and 0 at rc.
1905 * The derivative and second derivate are zero at both ends.
1906 * rsw = max(r - r_switch, 0)
1907 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1908 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1909 * force = force*dsw - potential*sw
1912 sc->c3 = -10*pow(rc - rsw, -3);
1913 sc->c4 = 15*pow(rc - rsw, -4);
1914 sc->c5 = -6*pow(rc - rsw, -5);
1918 init_interaction_const(FILE *fp,
1919 const t_commrec gmx_unused *cr,
1920 interaction_const_t **interaction_const,
1921 const t_forcerec *fr,
1924 interaction_const_t *ic;
1925 gmx_bool bUsesSimpleTables = TRUE;
1929 /* Just allocate something so we can free it */
1930 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1931 snew_aligned(ic->tabq_coul_F, 16, 32);
1932 snew_aligned(ic->tabq_coul_V, 16, 32);
1934 ic->rlist = fr->rlist;
1935 ic->rlistlong = fr->rlistlong;
1938 ic->vdwtype = fr->vdwtype;
1939 ic->vdw_modifier = fr->vdw_modifier;
1940 ic->rvdw = fr->rvdw;
1941 ic->rvdw_switch = fr->rvdw_switch;
1942 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1943 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1944 ic->sh_lj_ewald = 0;
1945 clear_force_switch_constants(&ic->dispersion_shift);
1946 clear_force_switch_constants(&ic->repulsion_shift);
1948 switch (ic->vdw_modifier)
1950 case eintmodPOTSHIFT:
1951 /* Only shift the potential, don't touch the force */
1952 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
1953 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
1954 if (EVDW_PME(ic->vdwtype))
1958 if (fr->cutoff_scheme == ecutsGROUP)
1960 gmx_fatal(FARGS, "Potential-shift is not (yet) implemented for LJ-PME with cutoff-scheme=group");
1962 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
1963 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
1966 case eintmodFORCESWITCH:
1967 /* Switch the force, switch and shift the potential */
1968 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
1969 &ic->dispersion_shift);
1970 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
1971 &ic->repulsion_shift);
1973 case eintmodPOTSWITCH:
1974 /* Switch the potential and force */
1975 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
1979 case eintmodEXACTCUTOFF:
1980 /* Nothing to do here */
1983 gmx_incons("unimplemented potential modifier");
1986 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
1988 /* Electrostatics */
1989 ic->eeltype = fr->eeltype;
1990 ic->coulomb_modifier = fr->coulomb_modifier;
1991 ic->rcoulomb = fr->rcoulomb;
1992 ic->epsilon_r = fr->epsilon_r;
1993 ic->epsfac = fr->epsfac;
1994 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
1996 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1998 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2005 /* Reaction-field */
2006 if (EEL_RF(ic->eeltype))
2008 ic->epsilon_rf = fr->epsilon_rf;
2009 ic->k_rf = fr->k_rf;
2010 ic->c_rf = fr->c_rf;
2014 /* For plain cut-off we might use the reaction-field kernels */
2015 ic->epsilon_rf = ic->epsilon_r;
2017 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2019 ic->c_rf = 1/ic->rcoulomb;
2029 real dispersion_shift;
2031 dispersion_shift = ic->dispersion_shift.cpot;
2032 if (EVDW_PME(ic->vdwtype))
2034 dispersion_shift -= ic->sh_lj_ewald;
2036 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2037 ic->repulsion_shift.cpot, dispersion_shift);
2039 if (ic->eeltype == eelCUT)
2041 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2043 else if (EEL_PME(ic->eeltype))
2045 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2050 *interaction_const = ic;
2052 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2054 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2056 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2057 * also sharing texture references. To keep the code simple, we don't
2058 * treat texture references as shared resources, but this means that
2059 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2060 * Hence, to ensure that the non-bonded kernels don't start before all
2061 * texture binding operations are finished, we need to wait for all ranks
2062 * to arrive here before continuing.
2064 * Note that we could omit this barrier if GPUs are not shared (or
2065 * texture objects are used), but as this is initialization code, there
2066 * is not point in complicating things.
2068 #ifdef GMX_THREAD_MPI
2073 #endif /* GMX_THREAD_MPI */
2076 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2077 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2080 static void init_nb_verlet(FILE *fp,
2081 nonbonded_verlet_t **nb_verlet,
2082 gmx_bool bFEP_NonBonded,
2083 const t_inputrec *ir,
2084 const t_forcerec *fr,
2085 const t_commrec *cr,
2086 const char *nbpu_opt)
2088 nonbonded_verlet_t *nbv;
2091 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2093 nbnxn_alloc_t *nb_alloc;
2094 nbnxn_free_t *nb_free;
2098 pick_nbnxn_resources(cr, fr->hwinfo,
2106 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2107 for (i = 0; i < nbv->ngrp; i++)
2109 nbv->grp[i].nbl_lists.nnbl = 0;
2110 nbv->grp[i].nbat = NULL;
2111 nbv->grp[i].kernel_type = nbnxnkNotSet;
2113 if (i == 0) /* local */
2115 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2116 nbv->bUseGPU, bEmulateGPU, ir,
2117 &nbv->grp[i].kernel_type,
2118 &nbv->grp[i].ewald_excl,
2121 else /* non-local */
2123 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2125 /* Use GPU for local, select a CPU kernel for non-local */
2126 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2128 &nbv->grp[i].kernel_type,
2129 &nbv->grp[i].ewald_excl,
2132 bHybridGPURun = TRUE;
2136 /* Use the same kernel for local and non-local interactions */
2137 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2138 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2145 /* init the NxN GPU data; the last argument tells whether we'll have
2146 * both local and non-local NB calculation on GPU */
2147 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2148 &fr->hwinfo->gpu_info, fr->gpu_opt,
2149 cr->rank_pp_intranode,
2150 (nbv->ngrp > 1) && !bHybridGPURun);
2152 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2156 nbv->min_ci_balanced = strtol(env, &end, 10);
2157 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2159 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2164 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2165 nbv->min_ci_balanced);
2170 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2173 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2174 nbv->min_ci_balanced);
2180 nbv->min_ci_balanced = 0;
2185 nbnxn_init_search(&nbv->nbs,
2186 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2187 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2189 gmx_omp_nthreads_get(emntNonbonded));
2191 for (i = 0; i < nbv->ngrp; i++)
2193 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2195 nb_alloc = &pmalloc;
2204 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2205 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2206 /* 8x8x8 "non-simple" lists are ATM always combined */
2207 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2211 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2213 gmx_bool bSimpleList;
2214 int enbnxninitcombrule;
2216 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2218 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2220 /* Plain LJ cut-off: we can optimize with combination rules */
2221 enbnxninitcombrule = enbnxninitcombruleDETECT;
2223 else if (fr->vdwtype == evdwPME)
2225 /* LJ-PME: we need to use a combination rule for the grid */
2226 if (fr->ljpme_combination_rule == eljpmeGEOM)
2228 enbnxninitcombrule = enbnxninitcombruleGEOM;
2232 enbnxninitcombrule = enbnxninitcombruleLB;
2237 /* We use a full combination matrix: no rule required */
2238 enbnxninitcombrule = enbnxninitcombruleNONE;
2242 snew(nbv->grp[i].nbat, 1);
2243 nbnxn_atomdata_init(fp,
2245 nbv->grp[i].kernel_type,
2247 fr->ntype, fr->nbfp,
2249 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2254 nbv->grp[i].nbat = nbv->grp[0].nbat;
2259 void init_forcerec(FILE *fp,
2260 const output_env_t oenv,
2263 const t_inputrec *ir,
2264 const gmx_mtop_t *mtop,
2265 const t_commrec *cr,
2271 const char *nbpu_opt,
2272 gmx_bool bNoSolvOpt,
2275 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2280 gmx_bool bGenericKernelOnly;
2281 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2282 gmx_bool bFEP_NonBonded;
2284 int *nm_ind, egp_flags;
2286 if (fr->hwinfo == NULL)
2288 /* Detect hardware, gather information.
2289 * In mdrun, hwinfo has already been set before calling init_forcerec.
2290 * Here we ignore GPUs, as tools will not use them anyhow.
2292 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2295 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2296 fr->use_simd_kernels = TRUE;
2298 fr->bDomDec = DOMAINDECOMP(cr);
2300 natoms = mtop->natoms;
2302 if (check_box(ir->ePBC, box))
2304 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2307 /* Test particle insertion ? */
2310 /* Set to the size of the molecule to be inserted (the last one) */
2311 /* Because of old style topologies, we have to use the last cg
2312 * instead of the last molecule type.
2314 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2315 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2316 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2318 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2326 /* Copy AdResS parameters */
2329 fr->adress_type = ir->adress->type;
2330 fr->adress_const_wf = ir->adress->const_wf;
2331 fr->adress_ex_width = ir->adress->ex_width;
2332 fr->adress_hy_width = ir->adress->hy_width;
2333 fr->adress_icor = ir->adress->icor;
2334 fr->adress_site = ir->adress->site;
2335 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2336 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2339 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2340 for (i = 0; i < ir->adress->n_energy_grps; i++)
2342 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2345 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2346 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2347 for (i = 0; i < fr->n_adress_tf_grps; i++)
2349 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2351 copy_rvec(ir->adress->refs, fr->adress_refs);
2355 fr->adress_type = eAdressOff;
2356 fr->adress_do_hybridpairs = FALSE;
2359 /* Copy the user determined parameters */
2360 fr->userint1 = ir->userint1;
2361 fr->userint2 = ir->userint2;
2362 fr->userint3 = ir->userint3;
2363 fr->userint4 = ir->userint4;
2364 fr->userreal1 = ir->userreal1;
2365 fr->userreal2 = ir->userreal2;
2366 fr->userreal3 = ir->userreal3;
2367 fr->userreal4 = ir->userreal4;
2370 fr->fc_stepsize = ir->fc_stepsize;
2373 fr->efep = ir->efep;
2374 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2375 if (ir->fepvals->bScCoul)
2377 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2378 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2382 fr->sc_alphacoul = 0;
2383 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2385 fr->sc_power = ir->fepvals->sc_power;
2386 fr->sc_r_power = ir->fepvals->sc_r_power;
2387 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2389 env = getenv("GMX_SCSIGMA_MIN");
2393 sscanf(env, "%lf", &dbl);
2394 fr->sc_sigma6_min = pow(dbl, 6);
2397 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2401 fr->bNonbonded = TRUE;
2402 if (getenv("GMX_NO_NONBONDED") != NULL)
2404 /* turn off non-bonded calculations */
2405 fr->bNonbonded = FALSE;
2406 md_print_warn(cr, fp,
2407 "Found environment variable GMX_NO_NONBONDED.\n"
2408 "Disabling nonbonded calculations.\n");
2411 bGenericKernelOnly = FALSE;
2413 /* We now check in the NS code whether a particular combination of interactions
2414 * can be used with water optimization, and disable it if that is not the case.
2417 if (getenv("GMX_NB_GENERIC") != NULL)
2422 "Found environment variable GMX_NB_GENERIC.\n"
2423 "Disabling all interaction-specific nonbonded kernels, will only\n"
2424 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2426 bGenericKernelOnly = TRUE;
2429 if (bGenericKernelOnly == TRUE)
2434 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2436 fr->use_simd_kernels = FALSE;
2440 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2441 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2445 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2447 /* Check if we can/should do all-vs-all kernels */
2448 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2449 fr->AllvsAll_work = NULL;
2450 fr->AllvsAll_workgb = NULL;
2452 /* All-vs-all kernels have not been implemented in 4.6, and
2453 * the SIMD group kernels are also buggy in this case. Non-SIMD
2454 * group kernels are OK. See Redmine #1249. */
2457 fr->bAllvsAll = FALSE;
2458 fr->use_simd_kernels = FALSE;
2462 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2463 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2464 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2465 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2466 "we are proceeding by disabling all CPU architecture-specific\n"
2467 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2468 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2472 /* Neighbour searching stuff */
2473 fr->cutoff_scheme = ir->cutoff_scheme;
2474 fr->bGrid = (ir->ns_type == ensGRID);
2475 fr->ePBC = ir->ePBC;
2477 if (fr->cutoff_scheme == ecutsGROUP)
2479 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2480 "removed in a future release when 'verlet' supports all interaction forms.\n";
2484 fprintf(stderr, "\n%s\n", note);
2488 fprintf(fp, "\n%s\n", note);
2492 /* Determine if we will do PBC for distances in bonded interactions */
2493 if (fr->ePBC == epbcNONE)
2495 fr->bMolPBC = FALSE;
2499 if (!DOMAINDECOMP(cr))
2501 /* The group cut-off scheme and SHAKE assume charge groups
2502 * are whole, but not using molpbc is faster in most cases.
2504 if (fr->cutoff_scheme == ecutsGROUP ||
2505 (ir->eConstrAlg == econtSHAKE &&
2506 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2507 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2509 fr->bMolPBC = ir->bPeriodicMols;
2514 if (getenv("GMX_USE_GRAPH") != NULL)
2516 fr->bMolPBC = FALSE;
2519 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2526 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2529 fr->bGB = (ir->implicit_solvent == eisGBSA);
2531 fr->rc_scaling = ir->refcoord_scaling;
2532 copy_rvec(ir->posres_com, fr->posres_com);
2533 copy_rvec(ir->posres_comB, fr->posres_comB);
2534 fr->rlist = cutoff_inf(ir->rlist);
2535 fr->rlistlong = cutoff_inf(ir->rlistlong);
2536 fr->eeltype = ir->coulombtype;
2537 fr->vdwtype = ir->vdwtype;
2538 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2540 fr->coulomb_modifier = ir->coulomb_modifier;
2541 fr->vdw_modifier = ir->vdw_modifier;
2543 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2544 switch (fr->eeltype)
2547 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2553 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2557 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2558 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2567 case eelPMEUSERSWITCH:
2568 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2573 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2577 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2581 /* Vdw: Translate from mdp settings to kernel format */
2582 switch (fr->vdwtype)
2588 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2592 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2599 case evdwENCADSHIFT:
2600 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2604 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2608 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2609 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2610 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2612 fr->bTwinRange = fr->rlistlong > fr->rlist;
2613 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2615 fr->reppow = mtop->ffparams.reppow;
2617 if (ir->cutoff_scheme == ecutsGROUP)
2619 fr->bvdwtab = (fr->vdwtype != evdwCUT ||
2620 !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2621 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2622 fr->bcoultab = !(fr->eeltype == eelCUT ||
2623 fr->eeltype == eelEWALD ||
2624 fr->eeltype == eelPME ||
2625 fr->eeltype == eelRF ||
2626 fr->eeltype == eelRF_ZERO);
2628 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2629 * going to be faster to tabulate the interaction than calling the generic kernel.
2631 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2633 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2635 fr->bcoultab = TRUE;
2638 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2639 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2640 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2641 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2643 if (fr->rcoulomb != fr->rvdw)
2645 fr->bcoultab = TRUE;
2649 if (getenv("GMX_REQUIRE_TABLES"))
2652 fr->bcoultab = TRUE;
2657 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2658 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2661 if (fr->bvdwtab == TRUE)
2663 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2664 fr->nbkernel_vdw_modifier = eintmodNONE;
2666 if (fr->bcoultab == TRUE)
2668 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2669 fr->nbkernel_elec_modifier = eintmodNONE;
2673 if (ir->cutoff_scheme == ecutsVERLET)
2675 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2677 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2679 fr->bvdwtab = FALSE;
2680 fr->bcoultab = FALSE;
2683 /* Tables are used for direct ewald sum */
2686 if (EEL_PME(ir->coulombtype))
2690 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2692 if (ir->coulombtype == eelP3M_AD)
2694 please_cite(fp, "Hockney1988");
2695 please_cite(fp, "Ballenegger2012");
2699 please_cite(fp, "Essmann95a");
2702 if (ir->ewald_geometry == eewg3DC)
2706 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2708 please_cite(fp, "In-Chul99a");
2711 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2712 init_ewald_tab(&(fr->ewald_table), ir, fp);
2715 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2716 1/fr->ewaldcoeff_q);
2720 if (EVDW_PME(ir->vdwtype))
2724 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2726 please_cite(fp, "Essmann95a");
2727 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2730 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2731 1/fr->ewaldcoeff_lj);
2735 /* Electrostatics */
2736 fr->epsilon_r = ir->epsilon_r;
2737 fr->epsilon_rf = ir->epsilon_rf;
2738 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2739 fr->rcoulomb_switch = ir->rcoulomb_switch;
2740 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2742 /* Parameters for generalized RF */
2746 if (fr->eeltype == eelGRF)
2748 init_generalized_rf(fp, mtop, ir, fr);
2751 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2752 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2753 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2754 IR_ELEC_FIELD(*ir) ||
2755 (fr->adress_icor != eAdressICOff)
2758 if (fr->cutoff_scheme == ecutsGROUP &&
2759 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2761 /* Count the total number of charge groups */
2762 fr->cg_nalloc = ncg_mtop(mtop);
2763 srenew(fr->cg_cm, fr->cg_nalloc);
2765 if (fr->shift_vec == NULL)
2767 snew(fr->shift_vec, SHIFTS);
2770 if (fr->fshift == NULL)
2772 snew(fr->fshift, SHIFTS);
2775 if (fr->nbfp == NULL)
2777 fr->ntype = mtop->ffparams.atnr;
2778 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2781 /* Copy the energy group exclusions */
2782 fr->egp_flags = ir->opts.egp_flags;
2784 /* Van der Waals stuff */
2785 fr->rvdw = cutoff_inf(ir->rvdw);
2786 fr->rvdw_switch = ir->rvdw_switch;
2787 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2789 if (fr->rvdw_switch >= fr->rvdw)
2791 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2792 fr->rvdw_switch, fr->rvdw);
2796 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2797 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2798 fr->rvdw_switch, fr->rvdw);
2802 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2804 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2807 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2809 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2814 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2815 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2818 fr->eDispCorr = ir->eDispCorr;
2819 if (ir->eDispCorr != edispcNO)
2821 set_avcsixtwelve(fp, fr, mtop);
2826 set_bham_b_max(fp, fr, mtop);
2829 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2831 /* Copy the GBSA data (radius, volume and surftens for each
2832 * atomtype) from the topology atomtype section to forcerec.
2834 snew(fr->atype_radius, fr->ntype);
2835 snew(fr->atype_vol, fr->ntype);
2836 snew(fr->atype_surftens, fr->ntype);
2837 snew(fr->atype_gb_radius, fr->ntype);
2838 snew(fr->atype_S_hct, fr->ntype);
2840 if (mtop->atomtypes.nr > 0)
2842 for (i = 0; i < fr->ntype; i++)
2844 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2846 for (i = 0; i < fr->ntype; i++)
2848 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2850 for (i = 0; i < fr->ntype; i++)
2852 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2854 for (i = 0; i < fr->ntype; i++)
2856 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2858 for (i = 0; i < fr->ntype; i++)
2860 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2864 /* Generate the GB table if needed */
2868 fr->gbtabscale = 2000;
2870 fr->gbtabscale = 500;
2874 fr->gbtab = make_gb_table(oenv, fr);
2876 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2878 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2879 if (!DOMAINDECOMP(cr))
2881 make_local_gb(cr, fr->born, ir->gb_algorithm);
2885 /* Set the charge scaling */
2886 if (fr->epsilon_r != 0)
2888 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2892 /* eps = 0 is infinite dieletric: no coulomb interactions */
2896 /* Reaction field constants */
2897 if (EEL_RF(fr->eeltype))
2899 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2900 fr->rcoulomb, fr->temp, fr->zsquare, box,
2901 &fr->kappa, &fr->k_rf, &fr->c_rf);
2904 /*This now calculates sum for q and c6*/
2905 set_chargesum(fp, fr, mtop);
2907 /* if we are using LR electrostatics, and they are tabulated,
2908 * the tables will contain modified coulomb interactions.
2909 * Since we want to use the non-shifted ones for 1-4
2910 * coulombic interactions, we must have an extra set of tables.
2913 /* Construct tables.
2914 * A little unnecessary to make both vdw and coul tables sometimes,
2915 * but what the heck... */
2917 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2918 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2920 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2921 fr->bBHAM || fr->bEwald) &&
2922 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2923 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2924 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2926 negp_pp = ir->opts.ngener - ir->nwall;
2930 bSomeNormalNbListsAreInUse = TRUE;
2935 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2936 for (egi = 0; egi < negp_pp; egi++)
2938 for (egj = egi; egj < negp_pp; egj++)
2940 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2941 if (!(egp_flags & EGP_EXCL))
2943 if (egp_flags & EGP_TABLE)
2949 bSomeNormalNbListsAreInUse = TRUE;
2954 if (bSomeNormalNbListsAreInUse)
2956 fr->nnblists = negptable + 1;
2960 fr->nnblists = negptable;
2962 if (fr->nnblists > 1)
2964 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2973 snew(fr->nblists, fr->nnblists);
2975 /* This code automatically gives table length tabext without cut-off's,
2976 * in that case grompp should already have checked that we do not need
2977 * normal tables and we only generate tables for 1-4 interactions.
2979 rtab = ir->rlistlong + ir->tabext;
2983 /* make tables for ordinary interactions */
2984 if (bSomeNormalNbListsAreInUse)
2986 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2989 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2991 if (!bMakeSeparate14Table)
2993 fr->tab14 = fr->nblists[0].table_elec_vdw;
3003 /* Read the special tables for certain energy group pairs */
3004 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3005 for (egi = 0; egi < negp_pp; egi++)
3007 for (egj = egi; egj < negp_pp; egj++)
3009 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3010 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3012 nbl = &(fr->nblists[m]);
3013 if (fr->nnblists > 1)
3015 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3017 /* Read the table file with the two energy groups names appended */
3018 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3019 *mtop->groups.grpname[nm_ind[egi]],
3020 *mtop->groups.grpname[nm_ind[egj]],
3024 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3025 *mtop->groups.grpname[nm_ind[egi]],
3026 *mtop->groups.grpname[nm_ind[egj]],
3027 &fr->nblists[fr->nnblists/2+m]);
3031 else if (fr->nnblists > 1)
3033 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3039 if (bMakeSeparate14Table)
3041 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3042 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3043 GMX_MAKETABLES_14ONLY);
3046 /* Read AdResS Thermo Force table if needed */
3047 if (fr->adress_icor == eAdressICThermoForce)
3049 /* old todo replace */
3051 if (ir->adress->n_tf_grps > 0)
3053 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3058 /* load the default table */
3059 snew(fr->atf_tabs, 1);
3060 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3065 fr->nwall = ir->nwall;
3066 if (ir->nwall && ir->wall_type == ewtTABLE)
3068 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3073 fcd->bondtab = make_bonded_tables(fp,
3074 F_TABBONDS, F_TABBONDSNC,
3076 fcd->angletab = make_bonded_tables(fp,
3079 fcd->dihtab = make_bonded_tables(fp,
3087 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3091 /* QM/MM initialization if requested
3095 fprintf(stderr, "QM/MM calculation requested.\n");
3098 fr->bQMMM = ir->bQMMM;
3099 fr->qr = mk_QMMMrec();
3101 /* Set all the static charge group info */
3102 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3104 &fr->bExcl_IntraCGAll_InterCGNone);
3105 if (DOMAINDECOMP(cr))
3111 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3114 if (!DOMAINDECOMP(cr))
3116 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3117 mtop->natoms, mtop->natoms, mtop->natoms);
3120 fr->print_force = print_force;
3123 /* coarse load balancing vars */
3128 /* Initialize neighbor search */
3129 init_ns(fp, cr, &fr->ns, fr, mtop);
3131 if (cr->duty & DUTY_PP)
3133 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3137 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3142 /* Initialize the thread working data for bonded interactions */
3143 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3145 snew(fr->excl_load, fr->nthreads+1);
3147 if (fr->cutoff_scheme == ecutsVERLET)
3149 if (ir->rcoulomb != ir->rvdw)
3151 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3154 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3157 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3158 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3160 if (ir->eDispCorr != edispcNO)
3162 calc_enervirdiff(fp, ir->eDispCorr, fr);
3166 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3167 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3168 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3170 void pr_forcerec(FILE *fp, t_forcerec *fr)
3174 pr_real(fp, fr->rlist);
3175 pr_real(fp, fr->rcoulomb);
3176 pr_real(fp, fr->fudgeQQ);
3177 pr_bool(fp, fr->bGrid);
3178 pr_bool(fp, fr->bTwinRange);
3179 /*pr_int(fp,fr->cg0);
3180 pr_int(fp,fr->hcg);*/
3181 for (i = 0; i < fr->nnblists; i++)
3183 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3185 pr_real(fp, fr->rcoulomb_switch);
3186 pr_real(fp, fr->rcoulomb);
3191 void forcerec_set_excl_load(t_forcerec *fr,
3192 const gmx_localtop_t *top)
3195 int t, i, j, ntot, n, ntarget;
3197 ind = top->excls.index;
3201 for (i = 0; i < top->excls.nr; i++)
3203 for (j = ind[i]; j < ind[i+1]; j++)
3212 fr->excl_load[0] = 0;
3215 for (t = 1; t <= fr->nthreads; t++)
3217 ntarget = (ntot*t)/fr->nthreads;
3218 while (i < top->excls.nr && n < ntarget)
3220 for (j = ind[i]; j < ind[i+1]; j++)
3229 fr->excl_load[t] = i;