2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/math/utilities.h"
51 #include "gmx_fatal.h"
52 #include "gmx_fatal_collective.h"
56 #include "nonbonded.h"
65 #include "md_support.h"
66 #include "md_logging.h"
70 #include "mtop_util.h"
71 #include "nbnxn_simd.h"
72 #include "nbnxn_search.h"
73 #include "nbnxn_atomdata.h"
74 #include "nbnxn_consts.h"
75 #include "gmx_omp_nthreads.h"
76 #include "gmx_detect_hardware.h"
80 /* MSVC definition for __cpuid() */
84 #include "types/nbnxn_cuda_types_ext.h"
85 #include "gpu_utils.h"
86 #include "nbnxn_cuda_data_mgmt.h"
87 #include "pmalloc_cuda.h"
89 t_forcerec *mk_forcerec(void)
99 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
103 for (i = 0; (i < atnr); i++)
105 for (j = 0; (j < atnr); j++)
107 fprintf(fp, "%2d - %2d", i, j);
110 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
111 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
115 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
116 C12(nbfp, atnr, i, j)/12.0);
123 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
131 snew(nbfp, 3*atnr*atnr);
132 for (i = k = 0; (i < atnr); i++)
134 for (j = 0; (j < atnr); j++, k++)
136 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
137 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
138 /* nbfp now includes the 6.0 derivative prefactor */
139 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
145 snew(nbfp, 2*atnr*atnr);
146 for (i = k = 0; (i < atnr); i++)
148 for (j = 0; (j < atnr); j++, k++)
150 /* nbfp now includes the 6.0/12.0 derivative prefactors */
151 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
152 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
160 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
164 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
168 snew(nbfp, 2*atnr*atnr);
169 for (i = 0; i < atnr; ++i)
171 for (j = 0; j < atnr; ++j)
173 c6i = idef->iparams[i*(atnr+1)].lj.c6;
174 c12i = idef->iparams[i*(atnr+1)].lj.c12;
175 c6j = idef->iparams[j*(atnr+1)].lj.c6;
176 c12j = idef->iparams[j*(atnr+1)].lj.c12;
177 c6 = sqrt(c6i * c6j);
178 c12 = sqrt(c12i * c12j);
179 if (comb_rule == eCOMB_ARITHMETIC
180 && !gmx_numzero(c6) && !gmx_numzero(c12))
182 sigmai = pow(c12i / c6i, 1.0/6.0);
183 sigmaj = pow(c12j / c6j, 1.0/6.0);
184 epsi = c6i * c6i / c12i;
185 epsj = c6j * c6j / c12j;
186 c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
187 c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
189 C6(nbfp, atnr, i, j) = c6*6.0;
190 C12(nbfp, atnr, i, j) = c12*12.0;
196 /* This routine sets fr->solvent_opt to the most common solvent in the
197 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
198 * the fr->solvent_type array with the correct type (or esolNO).
200 * Charge groups that fulfill the conditions but are not identical to the
201 * most common one will be marked as esolNO in the solvent_type array.
203 * TIP3p is identical to SPC for these purposes, so we call it
204 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
206 * NOTE: QM particle should not
207 * become an optimized solvent. Not even if there is only one charge
217 } solvent_parameters_t;
220 check_solvent_cg(const gmx_moltype_t *molt,
223 const unsigned char *qm_grpnr,
224 const t_grps *qm_grps,
226 int *n_solvent_parameters,
227 solvent_parameters_t **solvent_parameters_p,
231 const t_blocka *excl;
238 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
239 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
242 solvent_parameters_t *solvent_parameters;
244 /* We use a list with parameters for each solvent type.
245 * Every time we discover a new molecule that fulfills the basic
246 * conditions for a solvent we compare with the previous entries
247 * in these lists. If the parameters are the same we just increment
248 * the counter for that type, and otherwise we create a new type
249 * based on the current molecule.
251 * Once we've finished going through all molecules we check which
252 * solvent is most common, and mark all those molecules while we
253 * clear the flag on all others.
256 solvent_parameters = *solvent_parameters_p;
258 /* Mark the cg first as non optimized */
261 /* Check if this cg has no exclusions with atoms in other charge groups
262 * and all atoms inside the charge group excluded.
263 * We only have 3 or 4 atom solvent loops.
265 if (GET_CGINFO_EXCL_INTER(cginfo) ||
266 !GET_CGINFO_EXCL_INTRA(cginfo))
271 /* Get the indices of the first atom in this charge group */
272 j0 = molt->cgs.index[cg0];
273 j1 = molt->cgs.index[cg0+1];
275 /* Number of atoms in our molecule */
281 "Moltype '%s': there are %d atoms in this charge group\n",
285 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
288 if (nj < 3 || nj > 4)
293 /* Check if we are doing QM on this group */
295 if (qm_grpnr != NULL)
297 for (j = j0; j < j1 && !qm; j++)
299 qm = (qm_grpnr[j] < qm_grps->nr - 1);
302 /* Cannot use solvent optimization with QM */
308 atom = molt->atoms.atom;
310 /* Still looks like a solvent, time to check parameters */
312 /* If it is perturbed (free energy) we can't use the solvent loops,
313 * so then we just skip to the next molecule.
317 for (j = j0; j < j1 && !perturbed; j++)
319 perturbed = PERTURBED(atom[j]);
327 /* Now it's only a question if the VdW and charge parameters
328 * are OK. Before doing the check we compare and see if they are
329 * identical to a possible previous solvent type.
330 * First we assign the current types and charges.
332 for (j = 0; j < nj; j++)
334 tmp_vdwtype[j] = atom[j0+j].type;
335 tmp_charge[j] = atom[j0+j].q;
338 /* Does it match any previous solvent type? */
339 for (k = 0; k < *n_solvent_parameters; k++)
344 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
345 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
346 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
351 /* Check that types & charges match for all atoms in molecule */
352 for (j = 0; j < nj && match == TRUE; j++)
354 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
358 if (tmp_charge[j] != solvent_parameters[k].charge[j])
365 /* Congratulations! We have a matched solvent.
366 * Flag it with this type for later processing.
369 solvent_parameters[k].count += nmol;
371 /* We are done with this charge group */
376 /* If we get here, we have a tentative new solvent type.
377 * Before we add it we must check that it fulfills the requirements
378 * of the solvent optimized loops. First determine which atoms have
381 for (j = 0; j < nj; j++)
384 tjA = tmp_vdwtype[j];
386 /* Go through all other tpes and see if any have non-zero
387 * VdW parameters when combined with this one.
389 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
391 /* We already checked that the atoms weren't perturbed,
392 * so we only need to check state A now.
396 has_vdw[j] = (has_vdw[j] ||
397 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
398 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
399 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
404 has_vdw[j] = (has_vdw[j] ||
405 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
406 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
411 /* Now we know all we need to make the final check and assignment. */
415 * For this we require thatn all atoms have charge,
416 * the charges on atom 2 & 3 should be the same, and only
417 * atom 1 might have VdW.
419 if (has_vdw[1] == FALSE &&
420 has_vdw[2] == FALSE &&
421 tmp_charge[0] != 0 &&
422 tmp_charge[1] != 0 &&
423 tmp_charge[2] == tmp_charge[1])
425 srenew(solvent_parameters, *n_solvent_parameters+1);
426 solvent_parameters[*n_solvent_parameters].model = esolSPC;
427 solvent_parameters[*n_solvent_parameters].count = nmol;
428 for (k = 0; k < 3; k++)
430 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
431 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
434 *cg_sp = *n_solvent_parameters;
435 (*n_solvent_parameters)++;
440 /* Or could it be a TIP4P?
441 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
442 * Only atom 1 mght have VdW.
444 if (has_vdw[1] == FALSE &&
445 has_vdw[2] == FALSE &&
446 has_vdw[3] == FALSE &&
447 tmp_charge[0] == 0 &&
448 tmp_charge[1] != 0 &&
449 tmp_charge[2] == tmp_charge[1] &&
452 srenew(solvent_parameters, *n_solvent_parameters+1);
453 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
454 solvent_parameters[*n_solvent_parameters].count = nmol;
455 for (k = 0; k < 4; k++)
457 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
458 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
461 *cg_sp = *n_solvent_parameters;
462 (*n_solvent_parameters)++;
466 *solvent_parameters_p = solvent_parameters;
470 check_solvent(FILE * fp,
471 const gmx_mtop_t * mtop,
473 cginfo_mb_t *cginfo_mb)
476 const t_block * mols;
477 const gmx_moltype_t *molt;
478 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
479 int n_solvent_parameters;
480 solvent_parameters_t *solvent_parameters;
486 fprintf(debug, "Going to determine what solvent types we have.\n");
491 n_solvent_parameters = 0;
492 solvent_parameters = NULL;
493 /* Allocate temporary array for solvent type */
494 snew(cg_sp, mtop->nmolblock);
498 for (mb = 0; mb < mtop->nmolblock; mb++)
500 molt = &mtop->moltype[mtop->molblock[mb].type];
502 /* Here we have to loop over all individual molecules
503 * because we need to check for QMMM particles.
505 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
506 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
507 nmol = mtop->molblock[mb].nmol/nmol_ch;
508 for (mol = 0; mol < nmol_ch; mol++)
511 am = mol*cgs->index[cgs->nr];
512 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
514 check_solvent_cg(molt, cg_mol, nmol,
515 mtop->groups.grpnr[egcQMMM] ?
516 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
517 &mtop->groups.grps[egcQMMM],
519 &n_solvent_parameters, &solvent_parameters,
520 cginfo_mb[mb].cginfo[cgm+cg_mol],
521 &cg_sp[mb][cgm+cg_mol]);
524 cg_offset += cgs->nr;
525 at_offset += cgs->index[cgs->nr];
528 /* Puh! We finished going through all charge groups.
529 * Now find the most common solvent model.
532 /* Most common solvent this far */
534 for (i = 0; i < n_solvent_parameters; i++)
537 solvent_parameters[i].count > solvent_parameters[bestsp].count)
545 bestsol = solvent_parameters[bestsp].model;
552 #ifdef DISABLE_WATER_NLIST
557 for (mb = 0; mb < mtop->nmolblock; mb++)
559 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
560 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
561 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
563 if (cg_sp[mb][i] == bestsp)
565 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
570 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
577 if (bestsol != esolNO && fp != NULL)
579 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
581 solvent_parameters[bestsp].count);
584 sfree(solvent_parameters);
585 fr->solvent_opt = bestsol;
589 acNONE = 0, acCONSTRAINT, acSETTLE
592 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
593 t_forcerec *fr, gmx_bool bNoSolvOpt,
594 gmx_bool *bFEP_NonBonded,
595 gmx_bool *bExcl_IntraCGAll_InterCGNone)
598 const t_blocka *excl;
599 const gmx_moltype_t *molt;
600 const gmx_molblock_t *molb;
601 cginfo_mb_t *cginfo_mb;
604 int cg_offset, a_offset, cgm, am;
605 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
609 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bFEP;
611 ncg_tot = ncg_mtop(mtop);
612 snew(cginfo_mb, mtop->nmolblock);
614 snew(type_VDW, fr->ntype);
615 for (ai = 0; ai < fr->ntype; ai++)
617 type_VDW[ai] = FALSE;
618 for (j = 0; j < fr->ntype; j++)
620 type_VDW[ai] = type_VDW[ai] ||
622 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
623 C12(fr->nbfp, fr->ntype, ai, j) != 0;
627 *bFEP_NonBonded = FALSE;
628 *bExcl_IntraCGAll_InterCGNone = TRUE;
631 snew(bExcl, excl_nalloc);
634 for (mb = 0; mb < mtop->nmolblock; mb++)
636 molb = &mtop->molblock[mb];
637 molt = &mtop->moltype[molb->type];
641 /* Check if the cginfo is identical for all molecules in this block.
642 * If so, we only need an array of the size of one molecule.
643 * Otherwise we make an array of #mol times #cgs per molecule.
647 for (m = 0; m < molb->nmol; m++)
649 am = m*cgs->index[cgs->nr];
650 for (cg = 0; cg < cgs->nr; cg++)
653 a1 = cgs->index[cg+1];
654 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
655 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
659 if (mtop->groups.grpnr[egcQMMM] != NULL)
661 for (ai = a0; ai < a1; ai++)
663 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
664 mtop->groups.grpnr[egcQMMM][a_offset +ai])
673 cginfo_mb[mb].cg_start = cg_offset;
674 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
675 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
676 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
677 cginfo = cginfo_mb[mb].cginfo;
679 /* Set constraints flags for constrained atoms */
680 snew(a_con, molt->atoms.nr);
681 for (ftype = 0; ftype < F_NRE; ftype++)
683 if (interaction_function[ftype].flags & IF_CONSTRAINT)
688 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
692 for (a = 0; a < nral; a++)
694 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
695 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
701 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
704 am = m*cgs->index[cgs->nr];
705 for (cg = 0; cg < cgs->nr; cg++)
708 a1 = cgs->index[cg+1];
710 /* Store the energy group in cginfo */
711 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
712 SET_CGINFO_GID(cginfo[cgm+cg], gid);
714 /* Check the intra/inter charge group exclusions */
715 if (a1-a0 > excl_nalloc)
717 excl_nalloc = a1 - a0;
718 srenew(bExcl, excl_nalloc);
720 /* bExclIntraAll: all intra cg interactions excluded
721 * bExclInter: any inter cg interactions excluded
723 bExclIntraAll = TRUE;
728 for (ai = a0; ai < a1; ai++)
730 /* Check VDW and electrostatic interactions */
731 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
732 type_VDW[molt->atoms.atom[ai].typeB]);
733 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
734 molt->atoms.atom[ai].qB != 0);
736 bFEP = bFEP || (PERTURBED(molt->atoms.atom[ai]) != 0);
738 /* Clear the exclusion list for atom ai */
739 for (aj = a0; aj < a1; aj++)
741 bExcl[aj-a0] = FALSE;
743 /* Loop over all the exclusions of atom ai */
744 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
747 if (aj < a0 || aj >= a1)
756 /* Check if ai excludes a0 to a1 */
757 for (aj = a0; aj < a1; aj++)
761 bExclIntraAll = FALSE;
768 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
771 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
779 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
783 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
785 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
787 /* The size in cginfo is currently only read with DD */
788 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
792 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
796 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
800 SET_CGINFO_FEP(cginfo[cgm+cg]);
801 *bFEP_NonBonded = TRUE;
803 /* Store the charge group size */
804 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
806 if (!bExclIntraAll || bExclInter)
808 *bExcl_IntraCGAll_InterCGNone = FALSE;
815 cg_offset += molb->nmol*cgs->nr;
816 a_offset += molb->nmol*cgs->index[cgs->nr];
820 /* the solvent optimizer is called after the QM is initialized,
821 * because we don't want to have the QM subsystemto become an
825 check_solvent(fplog, mtop, fr, cginfo_mb);
827 if (getenv("GMX_NO_SOLV_OPT"))
831 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
832 "Disabling all solvent optimization\n");
834 fr->solvent_opt = esolNO;
838 fr->solvent_opt = esolNO;
840 if (!fr->solvent_opt)
842 for (mb = 0; mb < mtop->nmolblock; mb++)
844 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
846 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
854 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
859 ncg = cgi_mb[nmb-1].cg_end;
862 for (cg = 0; cg < ncg; cg++)
864 while (cg >= cgi_mb[mb].cg_end)
869 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
875 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
877 /*This now calculates sum for q and c6*/
878 double qsum, q2sum, q, c6sum, c6;
880 const t_atoms *atoms;
885 for (mb = 0; mb < mtop->nmolblock; mb++)
887 nmol = mtop->molblock[mb].nmol;
888 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
889 for (i = 0; i < atoms->nr; i++)
891 q = atoms->atom[i].q;
894 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
899 fr->q2sum[0] = q2sum;
900 fr->c6sum[0] = c6sum;
902 if (fr->efep != efepNO)
907 for (mb = 0; mb < mtop->nmolblock; mb++)
909 nmol = mtop->molblock[mb].nmol;
910 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
911 for (i = 0; i < atoms->nr; i++)
913 q = atoms->atom[i].qB;
916 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
920 fr->q2sum[1] = q2sum;
921 fr->c6sum[1] = c6sum;
926 fr->qsum[1] = fr->qsum[0];
927 fr->q2sum[1] = fr->q2sum[0];
928 fr->c6sum[1] = fr->c6sum[0];
932 if (fr->efep == efepNO)
934 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
938 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
939 fr->qsum[0], fr->qsum[1]);
944 void update_forcerec(t_forcerec *fr, matrix box)
946 if (fr->eeltype == eelGRF)
948 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
949 fr->rcoulomb, fr->temp, fr->zsquare, box,
950 &fr->kappa, &fr->k_rf, &fr->c_rf);
954 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
956 const t_atoms *atoms, *atoms_tpi;
957 const t_blocka *excl;
958 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
959 gmx_int64_t npair, npair_ij, tmpi, tmpj;
960 double csix, ctwelve;
964 real *nbfp_comb = NULL;
970 /* For LJ-PME, we want to correct for the difference between the
971 * actual C6 values and the C6 values used by the LJ-PME based on
972 * combination rules. */
974 if (EVDW_PME(fr->vdwtype))
976 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
977 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
978 for (tpi = 0; tpi < ntp; ++tpi)
980 for (tpj = 0; tpj < ntp; ++tpj)
982 C6(nbfp_comb, ntp, tpi, tpj) =
983 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
984 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
989 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
997 /* Count the types so we avoid natoms^2 operations */
998 snew(typecount, ntp);
999 gmx_mtop_count_atomtypes(mtop, q, typecount);
1001 for (tpi = 0; tpi < ntp; tpi++)
1003 for (tpj = tpi; tpj < ntp; tpj++)
1005 tmpi = typecount[tpi];
1006 tmpj = typecount[tpj];
1009 npair_ij = tmpi*tmpj;
1013 npair_ij = tmpi*(tmpi - 1)/2;
1017 /* nbfp now includes the 6.0 derivative prefactor */
1018 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1022 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1023 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1024 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1030 /* Subtract the excluded pairs.
1031 * The main reason for substracting exclusions is that in some cases
1032 * some combinations might never occur and the parameters could have
1033 * any value. These unused values should not influence the dispersion
1036 for (mb = 0; mb < mtop->nmolblock; mb++)
1038 nmol = mtop->molblock[mb].nmol;
1039 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1040 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1041 for (i = 0; (i < atoms->nr); i++)
1045 tpi = atoms->atom[i].type;
1049 tpi = atoms->atom[i].typeB;
1051 j1 = excl->index[i];
1052 j2 = excl->index[i+1];
1053 for (j = j1; j < j2; j++)
1060 tpj = atoms->atom[k].type;
1064 tpj = atoms->atom[k].typeB;
1068 /* nbfp now includes the 6.0 derivative prefactor */
1069 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1073 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1074 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1075 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1085 /* Only correct for the interaction of the test particle
1086 * with the rest of the system.
1089 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1092 for (mb = 0; mb < mtop->nmolblock; mb++)
1094 nmol = mtop->molblock[mb].nmol;
1095 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1096 for (j = 0; j < atoms->nr; j++)
1099 /* Remove the interaction of the test charge group
1102 if (mb == mtop->nmolblock-1)
1106 if (mb == 0 && nmol == 1)
1108 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1113 tpj = atoms->atom[j].type;
1117 tpj = atoms->atom[j].typeB;
1119 for (i = 0; i < fr->n_tpi; i++)
1123 tpi = atoms_tpi->atom[i].type;
1127 tpi = atoms_tpi->atom[i].typeB;
1131 /* nbfp now includes the 6.0 derivative prefactor */
1132 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1136 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1137 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1138 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1145 if (npair - nexcl <= 0 && fplog)
1147 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1153 csix /= npair - nexcl;
1154 ctwelve /= npair - nexcl;
1158 fprintf(debug, "Counted %d exclusions\n", nexcl);
1159 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1160 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1162 fr->avcsix[q] = csix;
1163 fr->avctwelve[q] = ctwelve;
1166 if (EVDW_PME(fr->vdwtype))
1173 if (fr->eDispCorr == edispcAllEner ||
1174 fr->eDispCorr == edispcAllEnerPres)
1176 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1177 fr->avcsix[0], fr->avctwelve[0]);
1181 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1187 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1188 const gmx_mtop_t *mtop)
1190 const t_atoms *at1, *at2;
1191 int mt1, mt2, i, j, tpi, tpj, ntypes;
1197 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1204 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1206 at1 = &mtop->moltype[mt1].atoms;
1207 for (i = 0; (i < at1->nr); i++)
1209 tpi = at1->atom[i].type;
1212 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1215 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1217 at2 = &mtop->moltype[mt2].atoms;
1218 for (j = 0; (j < at2->nr); j++)
1220 tpj = at2->atom[j].type;
1223 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1225 b = BHAMB(nbfp, ntypes, tpi, tpj);
1226 if (b > fr->bham_b_max)
1230 if ((b < bmin) || (bmin == -1))
1240 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1241 bmin, fr->bham_b_max);
1245 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1246 t_forcerec *fr, real rtab,
1247 const t_commrec *cr,
1248 const char *tabfn, char *eg1, char *eg2,
1258 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1263 sprintf(buf, "%s", tabfn);
1266 /* Append the two energy group names */
1267 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1268 eg1, eg2, ftp2ext(efXVG));
1270 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1271 /* Copy the contents of the table to separate coulomb and LJ tables too,
1272 * to improve cache performance.
1274 /* For performance reasons we want
1275 * the table data to be aligned to 16-byte. The pointers could be freed
1276 * but currently aren't.
1278 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1279 nbl->table_elec.format = nbl->table_elec_vdw.format;
1280 nbl->table_elec.r = nbl->table_elec_vdw.r;
1281 nbl->table_elec.n = nbl->table_elec_vdw.n;
1282 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1283 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1284 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1285 nbl->table_elec.ninteractions = 1;
1286 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1287 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1289 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1290 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1291 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1292 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1293 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1294 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1295 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1296 nbl->table_vdw.ninteractions = 2;
1297 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1298 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1300 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1302 for (j = 0; j < 4; j++)
1304 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1306 for (j = 0; j < 8; j++)
1308 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1313 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1314 int *ncount, int **count)
1316 const gmx_moltype_t *molt;
1318 int mt, ftype, stride, i, j, tabnr;
1320 for (mt = 0; mt < mtop->nmoltype; mt++)
1322 molt = &mtop->moltype[mt];
1323 for (ftype = 0; ftype < F_NRE; ftype++)
1325 if (ftype == ftype1 || ftype == ftype2)
1327 il = &molt->ilist[ftype];
1328 stride = 1 + NRAL(ftype);
1329 for (i = 0; i < il->nr; i += stride)
1331 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1334 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1336 if (tabnr >= *ncount)
1338 srenew(*count, tabnr+1);
1339 for (j = *ncount; j < tabnr+1; j++)
1352 static bondedtable_t *make_bonded_tables(FILE *fplog,
1353 int ftype1, int ftype2,
1354 const gmx_mtop_t *mtop,
1355 const char *basefn, const char *tabext)
1357 int i, ncount, *count;
1365 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1370 for (i = 0; i < ncount; i++)
1374 sprintf(tabfn, "%s", basefn);
1375 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1376 tabext, i, ftp2ext(efXVG));
1377 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1386 void forcerec_set_ranges(t_forcerec *fr,
1387 int ncg_home, int ncg_force,
1389 int natoms_force_constr, int natoms_f_novirsum)
1394 /* fr->ncg_force is unused in the standard code,
1395 * but it can be useful for modified code dealing with charge groups.
1397 fr->ncg_force = ncg_force;
1398 fr->natoms_force = natoms_force;
1399 fr->natoms_force_constr = natoms_force_constr;
1401 if (fr->natoms_force_constr > fr->nalloc_force)
1403 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1407 srenew(fr->f_twin, fr->nalloc_force);
1411 if (fr->bF_NoVirSum)
1413 fr->f_novirsum_n = natoms_f_novirsum;
1414 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1416 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1417 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1422 fr->f_novirsum_n = 0;
1426 static real cutoff_inf(real cutoff)
1430 cutoff = GMX_CUTOFF_INF;
1436 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1437 t_forcerec *fr, const t_inputrec *ir,
1438 const char *tabfn, const gmx_mtop_t *mtop,
1446 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1450 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1452 sprintf(buf, "%s", tabfn);
1453 for (i = 0; i < ir->adress->n_tf_grps; i++)
1455 j = ir->adress->tf_table_index[i]; /* get energy group index */
1456 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1457 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1460 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1462 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1467 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1474 ir->rcoulomb == 0 &&
1476 ir->ePBC == epbcNONE &&
1477 ir->vdwtype == evdwCUT &&
1478 ir->coulombtype == eelCUT &&
1479 ir->efep == efepNO &&
1480 (ir->implicit_solvent == eisNO ||
1481 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1482 ir->gb_algorithm == egbHCT ||
1483 ir->gb_algorithm == egbOBC))) &&
1484 getenv("GMX_NO_ALLVSALL") == NULL
1487 if (bAllvsAll && ir->opts.ngener > 1)
1489 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1495 fprintf(stderr, "\n%s\n", note);
1499 fprintf(fp, "\n%s\n", note);
1505 if (bAllvsAll && fp && MASTER(cr))
1507 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1514 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1518 /* These thread local data structures are used for bondeds only */
1519 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1521 if (fr->nthreads > 1)
1523 snew(fr->f_t, fr->nthreads);
1524 /* Thread 0 uses the global force and energy arrays */
1525 for (t = 1; t < fr->nthreads; t++)
1527 fr->f_t[t].f = NULL;
1528 fr->f_t[t].f_nalloc = 0;
1529 snew(fr->f_t[t].fshift, SHIFTS);
1530 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1531 for (i = 0; i < egNR; i++)
1533 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1540 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1541 const t_commrec *cr,
1542 const t_inputrec *ir,
1545 /* TODO: remove these GPU specific restrictions by implementing CUDA kernels */
1548 if (ir->vdw_modifier == eintmodFORCESWITCH ||
1549 ir->vdw_modifier == eintmodPOTSWITCH ||
1550 ir->vdwtype == evdwPME)
1552 md_print_warn(cr, fplog, "LJ switch functions and LJ-PME are not yet supported on the GPU, falling back to CPU only\n");
1557 if (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB)
1559 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1560 bGPU ? "GPUs" : "SIMD kernels",
1561 bGPU ? "CPU only" : "plain-C kernels");
1569 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1573 *kernel_type = nbnxnk4x4_PlainC;
1574 *ewald_excl = ewaldexclTable;
1576 #ifdef GMX_NBNXN_SIMD
1578 #ifdef GMX_NBNXN_SIMD_4XN
1579 *kernel_type = nbnxnk4xN_SIMD_4xN;
1581 #ifdef GMX_NBNXN_SIMD_2XNN
1582 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1585 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1586 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1587 * Currently this is based on the SIMD acceleration choice,
1588 * but it might be better to decide this at runtime based on CPU.
1590 * 4xN calculates more (zero) interactions, but has less pair-search
1591 * work and much better kernel instruction scheduling.
1593 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1594 * which doesn't have FMA, both the analytical and tabulated Ewald
1595 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1596 * 2x(4+4) because it results in significantly fewer pairs.
1597 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1598 * 10% with HT, 50% without HT. As we currently don't detect the actual
1599 * use of HT, use 4x8 to avoid a potential performance hit.
1600 * On Intel Haswell 4x8 is always faster.
1602 *kernel_type = nbnxnk4xN_SIMD_4xN;
1604 #ifndef GMX_SIMD_HAVE_FMA
1605 if (EEL_PME_EWALD(ir->coulombtype) ||
1606 EVDW_PME(ir->vdwtype))
1608 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1609 * There are enough instructions to make 2x(4+4) efficient.
1611 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1614 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1617 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1619 #ifdef GMX_NBNXN_SIMD_4XN
1620 *kernel_type = nbnxnk4xN_SIMD_4xN;
1622 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1625 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1627 #ifdef GMX_NBNXN_SIMD_2XNN
1628 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1630 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1634 /* Analytical Ewald exclusion correction is only an option in
1636 * Since table lookup's don't parallelize with SIMD, analytical
1637 * will probably always be faster for a SIMD width of 8 or more.
1638 * With FMA analytical is sometimes faster for a width if 4 as well.
1639 * On BlueGene/Q, this is faster regardless of precision.
1640 * In single precision, this is faster on Bulldozer.
1642 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1643 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1644 defined GMX_SIMD_IBM_QPX
1645 *ewald_excl = ewaldexclAnalytical;
1647 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1649 *ewald_excl = ewaldexclTable;
1651 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1653 *ewald_excl = ewaldexclAnalytical;
1657 #endif /* GMX_NBNXN_SIMD */
1661 const char *lookup_nbnxn_kernel_name(int kernel_type)
1663 const char *returnvalue = NULL;
1664 switch (kernel_type)
1667 returnvalue = "not set";
1669 case nbnxnk4x4_PlainC:
1670 returnvalue = "plain C";
1672 case nbnxnk4xN_SIMD_4xN:
1673 case nbnxnk4xN_SIMD_2xNN:
1674 #ifdef GMX_NBNXN_SIMD
1675 #if defined GMX_SIMD_X86_SSE2
1676 returnvalue = "SSE2";
1677 #elif defined GMX_SIMD_X86_SSE4_1
1678 returnvalue = "SSE4.1";
1679 #elif defined GMX_SIMD_X86_AVX_128_FMA
1680 returnvalue = "AVX_128_FMA";
1681 #elif defined GMX_SIMD_X86_AVX_256
1682 returnvalue = "AVX_256";
1683 #elif defined GMX_SIMD_X86_AVX2_256
1684 returnvalue = "AVX2_256";
1686 returnvalue = "SIMD";
1688 #else /* GMX_NBNXN_SIMD */
1689 returnvalue = "not available";
1690 #endif /* GMX_NBNXN_SIMD */
1692 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1693 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1697 gmx_fatal(FARGS, "Illegal kernel type selected");
1704 static void pick_nbnxn_kernel(FILE *fp,
1705 const t_commrec *cr,
1706 gmx_bool use_simd_kernels,
1708 gmx_bool bEmulateGPU,
1709 const t_inputrec *ir,
1712 gmx_bool bDoNonbonded)
1714 assert(kernel_type);
1716 *kernel_type = nbnxnkNotSet;
1717 *ewald_excl = ewaldexclTable;
1721 *kernel_type = nbnxnk8x8x8_PlainC;
1725 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1730 *kernel_type = nbnxnk8x8x8_CUDA;
1733 if (*kernel_type == nbnxnkNotSet)
1735 /* LJ PME with LB combination rule does 7 mesh operations.
1736 * This so slow that we don't compile SIMD non-bonded kernels for that.
1738 if (use_simd_kernels &&
1739 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1741 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1745 *kernel_type = nbnxnk4x4_PlainC;
1749 if (bDoNonbonded && fp != NULL)
1751 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1752 lookup_nbnxn_kernel_name(*kernel_type),
1753 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1754 nbnxn_kernel_to_cj_size(*kernel_type));
1758 static void pick_nbnxn_resources(const t_commrec *cr,
1759 const gmx_hw_info_t *hwinfo,
1760 gmx_bool bDoNonbonded,
1762 gmx_bool *bEmulateGPU,
1763 const gmx_gpu_opt_t *gpu_opt)
1765 gmx_bool bEmulateGPUEnvVarSet;
1766 char gpu_err_str[STRLEN];
1770 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1772 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1773 * GPUs (currently) only handle non-bonded calculations, we will
1774 * automatically switch to emulation if non-bonded calculations are
1775 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1776 * way to turn off GPU initialization, data movement, and cleanup.
1778 * GPU emulation can be useful to assess the performance one can expect by
1779 * adding GPU(s) to the machine. The conditional below allows this even
1780 * if mdrun is compiled without GPU acceleration support.
1781 * Note that you should freezing the system as otherwise it will explode.
1783 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1785 gpu_opt->ncuda_dev_use > 0));
1787 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1789 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1791 /* Each PP node will use the intra-node id-th device from the
1792 * list of detected/selected GPUs. */
1793 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1794 &hwinfo->gpu_info, gpu_opt))
1796 /* At this point the init should never fail as we made sure that
1797 * we have all the GPUs we need. If it still does, we'll bail. */
1798 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1800 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1801 cr->rank_pp_intranode),
1805 /* Here we actually turn on hardware GPU acceleration */
1810 gmx_bool uses_simple_tables(int cutoff_scheme,
1811 nonbonded_verlet_t *nbv,
1814 gmx_bool bUsesSimpleTables = TRUE;
1817 switch (cutoff_scheme)
1820 bUsesSimpleTables = TRUE;
1823 assert(NULL != nbv && NULL != nbv->grp);
1824 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1825 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1828 gmx_incons("unimplemented");
1830 return bUsesSimpleTables;
1833 static void init_ewald_f_table(interaction_const_t *ic,
1834 gmx_bool bUsesSimpleTables,
1839 if (bUsesSimpleTables)
1841 /* With a spacing of 0.0005 we are at the force summation accuracy
1842 * for the SSE kernels for "normal" atomistic simulations.
1844 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1847 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1848 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1852 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1853 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1854 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1857 sfree_aligned(ic->tabq_coul_FDV0);
1858 sfree_aligned(ic->tabq_coul_F);
1859 sfree_aligned(ic->tabq_coul_V);
1861 /* Create the original table data in FDV0 */
1862 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1863 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1864 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1865 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1866 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1869 void init_interaction_const_tables(FILE *fp,
1870 interaction_const_t *ic,
1871 gmx_bool bUsesSimpleTables,
1876 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1878 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1882 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1883 1/ic->tabq_scale, ic->tabq_size);
1888 static void clear_force_switch_constants(shift_consts_t *sc)
1895 static void force_switch_constants(real p,
1899 /* Here we determine the coefficient for shifting the force to zero
1900 * between distance rsw and the cut-off rc.
1901 * For a potential of r^-p, we have force p*r^-(p+1).
1902 * But to save flops we absorb p in the coefficient.
1904 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1905 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1907 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1908 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1909 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1912 static void potential_switch_constants(real rsw, real rc,
1913 switch_consts_t *sc)
1915 /* The switch function is 1 at rsw and 0 at rc.
1916 * The derivative and second derivate are zero at both ends.
1917 * rsw = max(r - r_switch, 0)
1918 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1919 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1920 * force = force*dsw - potential*sw
1923 sc->c3 = -10*pow(rc - rsw, -3);
1924 sc->c4 = 15*pow(rc - rsw, -4);
1925 sc->c5 = -6*pow(rc - rsw, -5);
1929 init_interaction_const(FILE *fp,
1930 const t_commrec gmx_unused *cr,
1931 interaction_const_t **interaction_const,
1932 const t_forcerec *fr,
1935 interaction_const_t *ic;
1936 gmx_bool bUsesSimpleTables = TRUE;
1940 /* Just allocate something so we can free it */
1941 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1942 snew_aligned(ic->tabq_coul_F, 16, 32);
1943 snew_aligned(ic->tabq_coul_V, 16, 32);
1945 ic->rlist = fr->rlist;
1946 ic->rlistlong = fr->rlistlong;
1949 ic->vdwtype = fr->vdwtype;
1950 ic->vdw_modifier = fr->vdw_modifier;
1951 ic->rvdw = fr->rvdw;
1952 ic->rvdw_switch = fr->rvdw_switch;
1953 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1954 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1955 ic->sh_lj_ewald = 0;
1956 clear_force_switch_constants(&ic->dispersion_shift);
1957 clear_force_switch_constants(&ic->repulsion_shift);
1959 switch (ic->vdw_modifier)
1961 case eintmodPOTSHIFT:
1962 /* Only shift the potential, don't touch the force */
1963 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
1964 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
1965 if (EVDW_PME(ic->vdwtype))
1969 if (fr->cutoff_scheme == ecutsGROUP)
1971 gmx_fatal(FARGS, "Potential-shift is not (yet) implemented for LJ-PME with cutoff-scheme=group");
1973 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
1974 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
1977 case eintmodFORCESWITCH:
1978 /* Switch the force, switch and shift the potential */
1979 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
1980 &ic->dispersion_shift);
1981 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
1982 &ic->repulsion_shift);
1984 case eintmodPOTSWITCH:
1985 /* Switch the potential and force */
1986 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
1990 case eintmodEXACTCUTOFF:
1991 /* Nothing to do here */
1994 gmx_incons("unimplemented potential modifier");
1997 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
1999 /* Electrostatics */
2000 ic->eeltype = fr->eeltype;
2001 ic->coulomb_modifier = fr->coulomb_modifier;
2002 ic->rcoulomb = fr->rcoulomb;
2003 ic->epsilon_r = fr->epsilon_r;
2004 ic->epsfac = fr->epsfac;
2005 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2007 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2009 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2016 /* Reaction-field */
2017 if (EEL_RF(ic->eeltype))
2019 ic->epsilon_rf = fr->epsilon_rf;
2020 ic->k_rf = fr->k_rf;
2021 ic->c_rf = fr->c_rf;
2025 /* For plain cut-off we might use the reaction-field kernels */
2026 ic->epsilon_rf = ic->epsilon_r;
2028 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2030 ic->c_rf = 1/ic->rcoulomb;
2040 real dispersion_shift;
2042 dispersion_shift = ic->dispersion_shift.cpot;
2043 if (EVDW_PME(ic->vdwtype))
2045 dispersion_shift -= ic->sh_lj_ewald;
2047 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2048 ic->repulsion_shift.cpot, dispersion_shift);
2050 if (ic->eeltype == eelCUT)
2052 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2054 else if (EEL_PME(ic->eeltype))
2056 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2061 *interaction_const = ic;
2063 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2065 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2067 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2068 * also sharing texture references. To keep the code simple, we don't
2069 * treat texture references as shared resources, but this means that
2070 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2071 * Hence, to ensure that the non-bonded kernels don't start before all
2072 * texture binding operations are finished, we need to wait for all ranks
2073 * to arrive here before continuing.
2075 * Note that we could omit this barrier if GPUs are not shared (or
2076 * texture objects are used), but as this is initialization code, there
2077 * is not point in complicating things.
2079 #ifdef GMX_THREAD_MPI
2084 #endif /* GMX_THREAD_MPI */
2087 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2088 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2091 static void init_nb_verlet(FILE *fp,
2092 nonbonded_verlet_t **nb_verlet,
2093 gmx_bool bFEP_NonBonded,
2094 const t_inputrec *ir,
2095 const t_forcerec *fr,
2096 const t_commrec *cr,
2097 const char *nbpu_opt)
2099 nonbonded_verlet_t *nbv;
2102 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2104 nbnxn_alloc_t *nb_alloc;
2105 nbnxn_free_t *nb_free;
2109 pick_nbnxn_resources(cr, fr->hwinfo,
2117 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2118 for (i = 0; i < nbv->ngrp; i++)
2120 nbv->grp[i].nbl_lists.nnbl = 0;
2121 nbv->grp[i].nbat = NULL;
2122 nbv->grp[i].kernel_type = nbnxnkNotSet;
2124 if (i == 0) /* local */
2126 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2127 nbv->bUseGPU, bEmulateGPU, ir,
2128 &nbv->grp[i].kernel_type,
2129 &nbv->grp[i].ewald_excl,
2132 else /* non-local */
2134 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2136 /* Use GPU for local, select a CPU kernel for non-local */
2137 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2139 &nbv->grp[i].kernel_type,
2140 &nbv->grp[i].ewald_excl,
2143 bHybridGPURun = TRUE;
2147 /* Use the same kernel for local and non-local interactions */
2148 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2149 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2156 /* init the NxN GPU data; the last argument tells whether we'll have
2157 * both local and non-local NB calculation on GPU */
2158 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2159 &fr->hwinfo->gpu_info, fr->gpu_opt,
2160 cr->rank_pp_intranode,
2161 (nbv->ngrp > 1) && !bHybridGPURun);
2163 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2167 nbv->min_ci_balanced = strtol(env, &end, 10);
2168 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2170 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2175 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2176 nbv->min_ci_balanced);
2181 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2184 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2185 nbv->min_ci_balanced);
2191 nbv->min_ci_balanced = 0;
2196 nbnxn_init_search(&nbv->nbs,
2197 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2198 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2200 gmx_omp_nthreads_get(emntNonbonded));
2202 for (i = 0; i < nbv->ngrp; i++)
2204 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2206 nb_alloc = &pmalloc;
2215 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2216 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2217 /* 8x8x8 "non-simple" lists are ATM always combined */
2218 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2222 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2224 gmx_bool bSimpleList;
2225 int enbnxninitcombrule;
2227 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2229 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2231 /* Plain LJ cut-off: we can optimize with combination rules */
2232 enbnxninitcombrule = enbnxninitcombruleDETECT;
2234 else if (fr->vdwtype == evdwPME)
2236 /* LJ-PME: we need to use a combination rule for the grid */
2237 if (fr->ljpme_combination_rule == eljpmeGEOM)
2239 enbnxninitcombrule = enbnxninitcombruleGEOM;
2243 enbnxninitcombrule = enbnxninitcombruleLB;
2248 /* We use a full combination matrix: no rule required */
2249 enbnxninitcombrule = enbnxninitcombruleNONE;
2253 snew(nbv->grp[i].nbat, 1);
2254 nbnxn_atomdata_init(fp,
2256 nbv->grp[i].kernel_type,
2258 fr->ntype, fr->nbfp,
2260 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2265 nbv->grp[i].nbat = nbv->grp[0].nbat;
2270 void init_forcerec(FILE *fp,
2271 const output_env_t oenv,
2274 const t_inputrec *ir,
2275 const gmx_mtop_t *mtop,
2276 const t_commrec *cr,
2282 const char *nbpu_opt,
2283 gmx_bool bNoSolvOpt,
2286 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2291 gmx_bool bGenericKernelOnly;
2292 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2293 gmx_bool bFEP_NonBonded;
2295 int *nm_ind, egp_flags;
2297 if (fr->hwinfo == NULL)
2299 /* Detect hardware, gather information.
2300 * In mdrun, hwinfo has already been set before calling init_forcerec.
2301 * Here we ignore GPUs, as tools will not use them anyhow.
2303 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2306 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2307 fr->use_simd_kernels = TRUE;
2309 fr->bDomDec = DOMAINDECOMP(cr);
2311 natoms = mtop->natoms;
2313 if (check_box(ir->ePBC, box))
2315 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2318 /* Test particle insertion ? */
2321 /* Set to the size of the molecule to be inserted (the last one) */
2322 /* Because of old style topologies, we have to use the last cg
2323 * instead of the last molecule type.
2325 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2326 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2327 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2329 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2337 /* Copy AdResS parameters */
2340 fr->adress_type = ir->adress->type;
2341 fr->adress_const_wf = ir->adress->const_wf;
2342 fr->adress_ex_width = ir->adress->ex_width;
2343 fr->adress_hy_width = ir->adress->hy_width;
2344 fr->adress_icor = ir->adress->icor;
2345 fr->adress_site = ir->adress->site;
2346 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2347 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2350 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2351 for (i = 0; i < ir->adress->n_energy_grps; i++)
2353 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2356 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2357 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2358 for (i = 0; i < fr->n_adress_tf_grps; i++)
2360 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2362 copy_rvec(ir->adress->refs, fr->adress_refs);
2366 fr->adress_type = eAdressOff;
2367 fr->adress_do_hybridpairs = FALSE;
2370 /* Copy the user determined parameters */
2371 fr->userint1 = ir->userint1;
2372 fr->userint2 = ir->userint2;
2373 fr->userint3 = ir->userint3;
2374 fr->userint4 = ir->userint4;
2375 fr->userreal1 = ir->userreal1;
2376 fr->userreal2 = ir->userreal2;
2377 fr->userreal3 = ir->userreal3;
2378 fr->userreal4 = ir->userreal4;
2381 fr->fc_stepsize = ir->fc_stepsize;
2384 fr->efep = ir->efep;
2385 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2386 if (ir->fepvals->bScCoul)
2388 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2389 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2393 fr->sc_alphacoul = 0;
2394 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2396 fr->sc_power = ir->fepvals->sc_power;
2397 fr->sc_r_power = ir->fepvals->sc_r_power;
2398 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2400 env = getenv("GMX_SCSIGMA_MIN");
2404 sscanf(env, "%lf", &dbl);
2405 fr->sc_sigma6_min = pow(dbl, 6);
2408 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2412 fr->bNonbonded = TRUE;
2413 if (getenv("GMX_NO_NONBONDED") != NULL)
2415 /* turn off non-bonded calculations */
2416 fr->bNonbonded = FALSE;
2417 md_print_warn(cr, fp,
2418 "Found environment variable GMX_NO_NONBONDED.\n"
2419 "Disabling nonbonded calculations.\n");
2422 bGenericKernelOnly = FALSE;
2424 /* We now check in the NS code whether a particular combination of interactions
2425 * can be used with water optimization, and disable it if that is not the case.
2428 if (getenv("GMX_NB_GENERIC") != NULL)
2433 "Found environment variable GMX_NB_GENERIC.\n"
2434 "Disabling all interaction-specific nonbonded kernels, will only\n"
2435 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2437 bGenericKernelOnly = TRUE;
2440 if (bGenericKernelOnly == TRUE)
2445 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2447 fr->use_simd_kernels = FALSE;
2451 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2452 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2456 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2458 /* Check if we can/should do all-vs-all kernels */
2459 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2460 fr->AllvsAll_work = NULL;
2461 fr->AllvsAll_workgb = NULL;
2463 /* All-vs-all kernels have not been implemented in 4.6, and
2464 * the SIMD group kernels are also buggy in this case. Non-SIMD
2465 * group kernels are OK. See Redmine #1249. */
2468 fr->bAllvsAll = FALSE;
2469 fr->use_simd_kernels = FALSE;
2473 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2474 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2475 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2476 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2477 "we are proceeding by disabling all CPU architecture-specific\n"
2478 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2479 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2483 /* Neighbour searching stuff */
2484 fr->cutoff_scheme = ir->cutoff_scheme;
2485 fr->bGrid = (ir->ns_type == ensGRID);
2486 fr->ePBC = ir->ePBC;
2488 if (fr->cutoff_scheme == ecutsGROUP)
2490 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2491 "removed in a future release when 'verlet' supports all interaction forms.\n";
2495 fprintf(stderr, "\n%s\n", note);
2499 fprintf(fp, "\n%s\n", note);
2503 /* Determine if we will do PBC for distances in bonded interactions */
2504 if (fr->ePBC == epbcNONE)
2506 fr->bMolPBC = FALSE;
2510 if (!DOMAINDECOMP(cr))
2512 /* The group cut-off scheme and SHAKE assume charge groups
2513 * are whole, but not using molpbc is faster in most cases.
2515 if (fr->cutoff_scheme == ecutsGROUP ||
2516 (ir->eConstrAlg == econtSHAKE &&
2517 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2518 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2520 fr->bMolPBC = ir->bPeriodicMols;
2525 if (getenv("GMX_USE_GRAPH") != NULL)
2527 fr->bMolPBC = FALSE;
2530 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2537 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2540 fr->bGB = (ir->implicit_solvent == eisGBSA);
2542 fr->rc_scaling = ir->refcoord_scaling;
2543 copy_rvec(ir->posres_com, fr->posres_com);
2544 copy_rvec(ir->posres_comB, fr->posres_comB);
2545 fr->rlist = cutoff_inf(ir->rlist);
2546 fr->rlistlong = cutoff_inf(ir->rlistlong);
2547 fr->eeltype = ir->coulombtype;
2548 fr->vdwtype = ir->vdwtype;
2549 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2551 fr->coulomb_modifier = ir->coulomb_modifier;
2552 fr->vdw_modifier = ir->vdw_modifier;
2554 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2555 switch (fr->eeltype)
2558 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2564 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2568 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2569 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2578 case eelPMEUSERSWITCH:
2579 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2584 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2588 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2592 /* Vdw: Translate from mdp settings to kernel format */
2593 switch (fr->vdwtype)
2599 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2603 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2610 case evdwENCADSHIFT:
2611 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2615 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2619 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2620 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2621 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2623 fr->bTwinRange = fr->rlistlong > fr->rlist;
2624 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2626 fr->reppow = mtop->ffparams.reppow;
2628 if (ir->cutoff_scheme == ecutsGROUP)
2630 fr->bvdwtab = (fr->vdwtype != evdwCUT ||
2631 !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2632 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2633 fr->bcoultab = !(fr->eeltype == eelCUT ||
2634 fr->eeltype == eelEWALD ||
2635 fr->eeltype == eelPME ||
2636 fr->eeltype == eelRF ||
2637 fr->eeltype == eelRF_ZERO);
2639 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2640 * going to be faster to tabulate the interaction than calling the generic kernel.
2642 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2644 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2646 fr->bcoultab = TRUE;
2649 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2650 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2651 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2652 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2654 if (fr->rcoulomb != fr->rvdw)
2656 fr->bcoultab = TRUE;
2660 if (getenv("GMX_REQUIRE_TABLES"))
2663 fr->bcoultab = TRUE;
2668 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2669 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2672 if (fr->bvdwtab == TRUE)
2674 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2675 fr->nbkernel_vdw_modifier = eintmodNONE;
2677 if (fr->bcoultab == TRUE)
2679 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2680 fr->nbkernel_elec_modifier = eintmodNONE;
2684 if (ir->cutoff_scheme == ecutsVERLET)
2686 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2688 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2690 fr->bvdwtab = FALSE;
2691 fr->bcoultab = FALSE;
2694 /* Tables are used for direct ewald sum */
2697 if (EEL_PME(ir->coulombtype))
2701 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2703 if (ir->coulombtype == eelP3M_AD)
2705 please_cite(fp, "Hockney1988");
2706 please_cite(fp, "Ballenegger2012");
2710 please_cite(fp, "Essmann95a");
2713 if (ir->ewald_geometry == eewg3DC)
2717 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2719 please_cite(fp, "In-Chul99a");
2722 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2723 init_ewald_tab(&(fr->ewald_table), ir, fp);
2726 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2727 1/fr->ewaldcoeff_q);
2731 if (EVDW_PME(ir->vdwtype))
2735 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2737 please_cite(fp, "Essmann95a");
2738 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2741 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2742 1/fr->ewaldcoeff_lj);
2746 /* Electrostatics */
2747 fr->epsilon_r = ir->epsilon_r;
2748 fr->epsilon_rf = ir->epsilon_rf;
2749 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2750 fr->rcoulomb_switch = ir->rcoulomb_switch;
2751 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2753 /* Parameters for generalized RF */
2757 if (fr->eeltype == eelGRF)
2759 init_generalized_rf(fp, mtop, ir, fr);
2762 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2763 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2764 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2765 IR_ELEC_FIELD(*ir) ||
2766 (fr->adress_icor != eAdressICOff)
2769 if (fr->cutoff_scheme == ecutsGROUP &&
2770 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2772 /* Count the total number of charge groups */
2773 fr->cg_nalloc = ncg_mtop(mtop);
2774 srenew(fr->cg_cm, fr->cg_nalloc);
2776 if (fr->shift_vec == NULL)
2778 snew(fr->shift_vec, SHIFTS);
2781 if (fr->fshift == NULL)
2783 snew(fr->fshift, SHIFTS);
2786 if (fr->nbfp == NULL)
2788 fr->ntype = mtop->ffparams.atnr;
2789 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2792 /* Copy the energy group exclusions */
2793 fr->egp_flags = ir->opts.egp_flags;
2795 /* Van der Waals stuff */
2796 fr->rvdw = cutoff_inf(ir->rvdw);
2797 fr->rvdw_switch = ir->rvdw_switch;
2798 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2800 if (fr->rvdw_switch >= fr->rvdw)
2802 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2803 fr->rvdw_switch, fr->rvdw);
2807 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2808 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2809 fr->rvdw_switch, fr->rvdw);
2813 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2815 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2818 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2820 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2825 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2826 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2829 fr->eDispCorr = ir->eDispCorr;
2830 if (ir->eDispCorr != edispcNO)
2832 set_avcsixtwelve(fp, fr, mtop);
2837 set_bham_b_max(fp, fr, mtop);
2840 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2842 /* Copy the GBSA data (radius, volume and surftens for each
2843 * atomtype) from the topology atomtype section to forcerec.
2845 snew(fr->atype_radius, fr->ntype);
2846 snew(fr->atype_vol, fr->ntype);
2847 snew(fr->atype_surftens, fr->ntype);
2848 snew(fr->atype_gb_radius, fr->ntype);
2849 snew(fr->atype_S_hct, fr->ntype);
2851 if (mtop->atomtypes.nr > 0)
2853 for (i = 0; i < fr->ntype; i++)
2855 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2857 for (i = 0; i < fr->ntype; i++)
2859 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2861 for (i = 0; i < fr->ntype; i++)
2863 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2865 for (i = 0; i < fr->ntype; i++)
2867 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2869 for (i = 0; i < fr->ntype; i++)
2871 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2875 /* Generate the GB table if needed */
2879 fr->gbtabscale = 2000;
2881 fr->gbtabscale = 500;
2885 fr->gbtab = make_gb_table(oenv, fr);
2887 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2889 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2890 if (!DOMAINDECOMP(cr))
2892 make_local_gb(cr, fr->born, ir->gb_algorithm);
2896 /* Set the charge scaling */
2897 if (fr->epsilon_r != 0)
2899 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2903 /* eps = 0 is infinite dieletric: no coulomb interactions */
2907 /* Reaction field constants */
2908 if (EEL_RF(fr->eeltype))
2910 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2911 fr->rcoulomb, fr->temp, fr->zsquare, box,
2912 &fr->kappa, &fr->k_rf, &fr->c_rf);
2915 /*This now calculates sum for q and c6*/
2916 set_chargesum(fp, fr, mtop);
2918 /* if we are using LR electrostatics, and they are tabulated,
2919 * the tables will contain modified coulomb interactions.
2920 * Since we want to use the non-shifted ones for 1-4
2921 * coulombic interactions, we must have an extra set of tables.
2924 /* Construct tables.
2925 * A little unnecessary to make both vdw and coul tables sometimes,
2926 * but what the heck... */
2928 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2929 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2931 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2932 fr->bBHAM || fr->bEwald) &&
2933 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2934 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2935 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2937 negp_pp = ir->opts.ngener - ir->nwall;
2941 bSomeNormalNbListsAreInUse = TRUE;
2946 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2947 for (egi = 0; egi < negp_pp; egi++)
2949 for (egj = egi; egj < negp_pp; egj++)
2951 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2952 if (!(egp_flags & EGP_EXCL))
2954 if (egp_flags & EGP_TABLE)
2960 bSomeNormalNbListsAreInUse = TRUE;
2965 if (bSomeNormalNbListsAreInUse)
2967 fr->nnblists = negptable + 1;
2971 fr->nnblists = negptable;
2973 if (fr->nnblists > 1)
2975 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2984 snew(fr->nblists, fr->nnblists);
2986 /* This code automatically gives table length tabext without cut-off's,
2987 * in that case grompp should already have checked that we do not need
2988 * normal tables and we only generate tables for 1-4 interactions.
2990 rtab = ir->rlistlong + ir->tabext;
2994 /* make tables for ordinary interactions */
2995 if (bSomeNormalNbListsAreInUse)
2997 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3000 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3002 if (!bMakeSeparate14Table)
3004 fr->tab14 = fr->nblists[0].table_elec_vdw;
3014 /* Read the special tables for certain energy group pairs */
3015 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3016 for (egi = 0; egi < negp_pp; egi++)
3018 for (egj = egi; egj < negp_pp; egj++)
3020 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3021 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3023 nbl = &(fr->nblists[m]);
3024 if (fr->nnblists > 1)
3026 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3028 /* Read the table file with the two energy groups names appended */
3029 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3030 *mtop->groups.grpname[nm_ind[egi]],
3031 *mtop->groups.grpname[nm_ind[egj]],
3035 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3036 *mtop->groups.grpname[nm_ind[egi]],
3037 *mtop->groups.grpname[nm_ind[egj]],
3038 &fr->nblists[fr->nnblists/2+m]);
3042 else if (fr->nnblists > 1)
3044 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3050 if (bMakeSeparate14Table)
3052 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3053 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3054 GMX_MAKETABLES_14ONLY);
3057 /* Read AdResS Thermo Force table if needed */
3058 if (fr->adress_icor == eAdressICThermoForce)
3060 /* old todo replace */
3062 if (ir->adress->n_tf_grps > 0)
3064 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3069 /* load the default table */
3070 snew(fr->atf_tabs, 1);
3071 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3076 fr->nwall = ir->nwall;
3077 if (ir->nwall && ir->wall_type == ewtTABLE)
3079 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3084 fcd->bondtab = make_bonded_tables(fp,
3085 F_TABBONDS, F_TABBONDSNC,
3087 fcd->angletab = make_bonded_tables(fp,
3090 fcd->dihtab = make_bonded_tables(fp,
3098 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3102 /* QM/MM initialization if requested
3106 fprintf(stderr, "QM/MM calculation requested.\n");
3109 fr->bQMMM = ir->bQMMM;
3110 fr->qr = mk_QMMMrec();
3112 /* Set all the static charge group info */
3113 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3115 &fr->bExcl_IntraCGAll_InterCGNone);
3116 if (DOMAINDECOMP(cr))
3122 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3125 if (!DOMAINDECOMP(cr))
3127 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3128 mtop->natoms, mtop->natoms, mtop->natoms);
3131 fr->print_force = print_force;
3134 /* coarse load balancing vars */
3139 /* Initialize neighbor search */
3140 init_ns(fp, cr, &fr->ns, fr, mtop);
3142 if (cr->duty & DUTY_PP)
3144 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3148 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3153 /* Initialize the thread working data for bonded interactions */
3154 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3156 snew(fr->excl_load, fr->nthreads+1);
3158 if (fr->cutoff_scheme == ecutsVERLET)
3160 if (ir->rcoulomb != ir->rvdw)
3162 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3165 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3168 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3169 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3171 if (ir->eDispCorr != edispcNO)
3173 calc_enervirdiff(fp, ir->eDispCorr, fr);
3177 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3178 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3179 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3181 void pr_forcerec(FILE *fp, t_forcerec *fr)
3185 pr_real(fp, fr->rlist);
3186 pr_real(fp, fr->rcoulomb);
3187 pr_real(fp, fr->fudgeQQ);
3188 pr_bool(fp, fr->bGrid);
3189 pr_bool(fp, fr->bTwinRange);
3190 /*pr_int(fp,fr->cg0);
3191 pr_int(fp,fr->hcg);*/
3192 for (i = 0; i < fr->nnblists; i++)
3194 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3196 pr_real(fp, fr->rcoulomb_switch);
3197 pr_real(fp, fr->rcoulomb);
3202 void forcerec_set_excl_load(t_forcerec *fr,
3203 const gmx_localtop_t *top)
3206 int t, i, j, ntot, n, ntarget;
3208 ind = top->excls.index;
3212 for (i = 0; i < top->excls.nr; i++)
3214 for (j = ind[i]; j < ind[i+1]; j++)
3223 fr->excl_load[0] = 0;
3226 for (t = 1; t <= fr->nthreads; t++)
3228 ntarget = (ntot*t)/fr->nthreads;
3229 while (i < top->excls.nr && n < ntarget)
3231 for (j = ind[i]; j < ind[i+1]; j++)
3240 fr->excl_load[t] = i;