2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/math/utilities.h"
51 #include "gmx_fatal.h"
52 #include "gmx_fatal_collective.h"
56 #include "nonbonded.h"
65 #include "md_support.h"
66 #include "md_logging.h"
71 #include "mtop_util.h"
72 #include "nbnxn_search.h"
73 #include "nbnxn_atomdata.h"
74 #include "nbnxn_consts.h"
75 #include "gmx_omp_nthreads.h"
76 #include "gmx_detect_hardware.h"
80 /* MSVC definition for __cpuid() */
84 #include "types/nbnxn_cuda_types_ext.h"
85 #include "gpu_utils.h"
86 #include "nbnxn_cuda_data_mgmt.h"
87 #include "pmalloc_cuda.h"
89 t_forcerec *mk_forcerec(void)
99 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
103 for (i = 0; (i < atnr); i++)
105 for (j = 0; (j < atnr); j++)
107 fprintf(fp, "%2d - %2d", i, j);
110 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
111 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
115 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
116 C12(nbfp, atnr, i, j)/12.0);
123 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
131 snew(nbfp, 3*atnr*atnr);
132 for (i = k = 0; (i < atnr); i++)
134 for (j = 0; (j < atnr); j++, k++)
136 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
137 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
138 /* nbfp now includes the 6.0 derivative prefactor */
139 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
145 snew(nbfp, 2*atnr*atnr);
146 for (i = k = 0; (i < atnr); i++)
148 for (j = 0; (j < atnr); j++, k++)
150 /* nbfp now includes the 6.0/12.0 derivative prefactors */
151 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
152 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
160 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
164 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
168 snew(nbfp, 2*atnr*atnr);
169 for (i = 0; i < atnr; ++i)
171 for (j = 0; j < atnr; ++j)
173 c6i = idef->iparams[i*(atnr+1)].lj.c6;
174 c12i = idef->iparams[i*(atnr+1)].lj.c12;
175 c6j = idef->iparams[j*(atnr+1)].lj.c6;
176 c12j = idef->iparams[j*(atnr+1)].lj.c12;
177 c6 = sqrt(c6i * c6j);
178 c12 = sqrt(c12i * c12j);
179 if (comb_rule == eCOMB_ARITHMETIC
180 && !gmx_numzero(c6) && !gmx_numzero(c12))
182 sigmai = pow(c12i / c6i, 1.0/6.0);
183 sigmaj = pow(c12j / c6j, 1.0/6.0);
184 epsi = c6i * c6i / c12i;
185 epsj = c6j * c6j / c12j;
186 c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
187 c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
189 C6(nbfp, atnr, i, j) = c6*6.0;
190 C12(nbfp, atnr, i, j) = c12*12.0;
196 /* This routine sets fr->solvent_opt to the most common solvent in the
197 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
198 * the fr->solvent_type array with the correct type (or esolNO).
200 * Charge groups that fulfill the conditions but are not identical to the
201 * most common one will be marked as esolNO in the solvent_type array.
203 * TIP3p is identical to SPC for these purposes, so we call it
204 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
206 * NOTE: QM particle should not
207 * become an optimized solvent. Not even if there is only one charge
217 } solvent_parameters_t;
220 check_solvent_cg(const gmx_moltype_t *molt,
223 const unsigned char *qm_grpnr,
224 const t_grps *qm_grps,
226 int *n_solvent_parameters,
227 solvent_parameters_t **solvent_parameters_p,
231 const t_blocka * excl;
242 solvent_parameters_t *solvent_parameters;
244 /* We use a list with parameters for each solvent type.
245 * Every time we discover a new molecule that fulfills the basic
246 * conditions for a solvent we compare with the previous entries
247 * in these lists. If the parameters are the same we just increment
248 * the counter for that type, and otherwise we create a new type
249 * based on the current molecule.
251 * Once we've finished going through all molecules we check which
252 * solvent is most common, and mark all those molecules while we
253 * clear the flag on all others.
256 solvent_parameters = *solvent_parameters_p;
258 /* Mark the cg first as non optimized */
261 /* Check if this cg has no exclusions with atoms in other charge groups
262 * and all atoms inside the charge group excluded.
263 * We only have 3 or 4 atom solvent loops.
265 if (GET_CGINFO_EXCL_INTER(cginfo) ||
266 !GET_CGINFO_EXCL_INTRA(cginfo))
271 /* Get the indices of the first atom in this charge group */
272 j0 = molt->cgs.index[cg0];
273 j1 = molt->cgs.index[cg0+1];
275 /* Number of atoms in our molecule */
281 "Moltype '%s': there are %d atoms in this charge group\n",
285 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
288 if (nj < 3 || nj > 4)
293 /* Check if we are doing QM on this group */
295 if (qm_grpnr != NULL)
297 for (j = j0; j < j1 && !qm; j++)
299 qm = (qm_grpnr[j] < qm_grps->nr - 1);
302 /* Cannot use solvent optimization with QM */
308 atom = molt->atoms.atom;
310 /* Still looks like a solvent, time to check parameters */
312 /* If it is perturbed (free energy) we can't use the solvent loops,
313 * so then we just skip to the next molecule.
317 for (j = j0; j < j1 && !perturbed; j++)
319 perturbed = PERTURBED(atom[j]);
327 /* Now it's only a question if the VdW and charge parameters
328 * are OK. Before doing the check we compare and see if they are
329 * identical to a possible previous solvent type.
330 * First we assign the current types and charges.
332 for (j = 0; j < nj; j++)
334 tmp_vdwtype[j] = atom[j0+j].type;
335 tmp_charge[j] = atom[j0+j].q;
338 /* Does it match any previous solvent type? */
339 for (k = 0; k < *n_solvent_parameters; k++)
344 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
345 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
346 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
351 /* Check that types & charges match for all atoms in molecule */
352 for (j = 0; j < nj && match == TRUE; j++)
354 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
358 if (tmp_charge[j] != solvent_parameters[k].charge[j])
365 /* Congratulations! We have a matched solvent.
366 * Flag it with this type for later processing.
369 solvent_parameters[k].count += nmol;
371 /* We are done with this charge group */
376 /* If we get here, we have a tentative new solvent type.
377 * Before we add it we must check that it fulfills the requirements
378 * of the solvent optimized loops. First determine which atoms have
381 for (j = 0; j < nj; j++)
384 tjA = tmp_vdwtype[j];
386 /* Go through all other tpes and see if any have non-zero
387 * VdW parameters when combined with this one.
389 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
391 /* We already checked that the atoms weren't perturbed,
392 * so we only need to check state A now.
396 has_vdw[j] = (has_vdw[j] ||
397 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
398 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
399 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
404 has_vdw[j] = (has_vdw[j] ||
405 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
406 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
411 /* Now we know all we need to make the final check and assignment. */
415 * For this we require thatn all atoms have charge,
416 * the charges on atom 2 & 3 should be the same, and only
417 * atom 1 might have VdW.
419 if (has_vdw[1] == FALSE &&
420 has_vdw[2] == FALSE &&
421 tmp_charge[0] != 0 &&
422 tmp_charge[1] != 0 &&
423 tmp_charge[2] == tmp_charge[1])
425 srenew(solvent_parameters, *n_solvent_parameters+1);
426 solvent_parameters[*n_solvent_parameters].model = esolSPC;
427 solvent_parameters[*n_solvent_parameters].count = nmol;
428 for (k = 0; k < 3; k++)
430 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
431 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
434 *cg_sp = *n_solvent_parameters;
435 (*n_solvent_parameters)++;
440 /* Or could it be a TIP4P?
441 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
442 * Only atom 1 mght have VdW.
444 if (has_vdw[1] == FALSE &&
445 has_vdw[2] == FALSE &&
446 has_vdw[3] == FALSE &&
447 tmp_charge[0] == 0 &&
448 tmp_charge[1] != 0 &&
449 tmp_charge[2] == tmp_charge[1] &&
452 srenew(solvent_parameters, *n_solvent_parameters+1);
453 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
454 solvent_parameters[*n_solvent_parameters].count = nmol;
455 for (k = 0; k < 4; k++)
457 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
458 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
461 *cg_sp = *n_solvent_parameters;
462 (*n_solvent_parameters)++;
466 *solvent_parameters_p = solvent_parameters;
470 check_solvent(FILE * fp,
471 const gmx_mtop_t * mtop,
473 cginfo_mb_t *cginfo_mb)
476 const t_block * mols;
477 const gmx_moltype_t *molt;
478 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
479 int n_solvent_parameters;
480 solvent_parameters_t *solvent_parameters;
486 fprintf(debug, "Going to determine what solvent types we have.\n");
491 n_solvent_parameters = 0;
492 solvent_parameters = NULL;
493 /* Allocate temporary array for solvent type */
494 snew(cg_sp, mtop->nmolblock);
498 for (mb = 0; mb < mtop->nmolblock; mb++)
500 molt = &mtop->moltype[mtop->molblock[mb].type];
502 /* Here we have to loop over all individual molecules
503 * because we need to check for QMMM particles.
505 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
506 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
507 nmol = mtop->molblock[mb].nmol/nmol_ch;
508 for (mol = 0; mol < nmol_ch; mol++)
511 am = mol*cgs->index[cgs->nr];
512 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
514 check_solvent_cg(molt, cg_mol, nmol,
515 mtop->groups.grpnr[egcQMMM] ?
516 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
517 &mtop->groups.grps[egcQMMM],
519 &n_solvent_parameters, &solvent_parameters,
520 cginfo_mb[mb].cginfo[cgm+cg_mol],
521 &cg_sp[mb][cgm+cg_mol]);
524 cg_offset += cgs->nr;
525 at_offset += cgs->index[cgs->nr];
528 /* Puh! We finished going through all charge groups.
529 * Now find the most common solvent model.
532 /* Most common solvent this far */
534 for (i = 0; i < n_solvent_parameters; i++)
537 solvent_parameters[i].count > solvent_parameters[bestsp].count)
545 bestsol = solvent_parameters[bestsp].model;
552 #ifdef DISABLE_WATER_NLIST
557 for (mb = 0; mb < mtop->nmolblock; mb++)
559 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
560 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
561 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
563 if (cg_sp[mb][i] == bestsp)
565 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
570 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
577 if (bestsol != esolNO && fp != NULL)
579 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
581 solvent_parameters[bestsp].count);
584 sfree(solvent_parameters);
585 fr->solvent_opt = bestsol;
589 acNONE = 0, acCONSTRAINT, acSETTLE
592 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
593 t_forcerec *fr, gmx_bool bNoSolvOpt,
594 gmx_bool *bExcl_IntraCGAll_InterCGNone)
597 const t_blocka *excl;
598 const gmx_moltype_t *molt;
599 const gmx_molblock_t *molb;
600 cginfo_mb_t *cginfo_mb;
603 int cg_offset, a_offset, cgm, am;
604 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
608 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
610 ncg_tot = ncg_mtop(mtop);
611 snew(cginfo_mb, mtop->nmolblock);
613 snew(type_VDW, fr->ntype);
614 for (ai = 0; ai < fr->ntype; ai++)
616 type_VDW[ai] = FALSE;
617 for (j = 0; j < fr->ntype; j++)
619 type_VDW[ai] = type_VDW[ai] ||
621 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
622 C12(fr->nbfp, fr->ntype, ai, j) != 0;
626 *bExcl_IntraCGAll_InterCGNone = TRUE;
629 snew(bExcl, excl_nalloc);
632 for (mb = 0; mb < mtop->nmolblock; mb++)
634 molb = &mtop->molblock[mb];
635 molt = &mtop->moltype[molb->type];
639 /* Check if the cginfo is identical for all molecules in this block.
640 * If so, we only need an array of the size of one molecule.
641 * Otherwise we make an array of #mol times #cgs per molecule.
645 for (m = 0; m < molb->nmol; m++)
647 am = m*cgs->index[cgs->nr];
648 for (cg = 0; cg < cgs->nr; cg++)
651 a1 = cgs->index[cg+1];
652 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
653 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
657 if (mtop->groups.grpnr[egcQMMM] != NULL)
659 for (ai = a0; ai < a1; ai++)
661 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
662 mtop->groups.grpnr[egcQMMM][a_offset +ai])
671 cginfo_mb[mb].cg_start = cg_offset;
672 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
673 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
674 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
675 cginfo = cginfo_mb[mb].cginfo;
677 /* Set constraints flags for constrained atoms */
678 snew(a_con, molt->atoms.nr);
679 for (ftype = 0; ftype < F_NRE; ftype++)
681 if (interaction_function[ftype].flags & IF_CONSTRAINT)
686 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
690 for (a = 0; a < nral; a++)
692 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
693 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
699 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
702 am = m*cgs->index[cgs->nr];
703 for (cg = 0; cg < cgs->nr; cg++)
706 a1 = cgs->index[cg+1];
708 /* Store the energy group in cginfo */
709 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
710 SET_CGINFO_GID(cginfo[cgm+cg], gid);
712 /* Check the intra/inter charge group exclusions */
713 if (a1-a0 > excl_nalloc)
715 excl_nalloc = a1 - a0;
716 srenew(bExcl, excl_nalloc);
718 /* bExclIntraAll: all intra cg interactions excluded
719 * bExclInter: any inter cg interactions excluded
721 bExclIntraAll = TRUE;
725 for (ai = a0; ai < a1; ai++)
727 /* Check VDW and electrostatic interactions */
728 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
729 type_VDW[molt->atoms.atom[ai].typeB]);
730 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
731 molt->atoms.atom[ai].qB != 0);
733 /* Clear the exclusion list for atom ai */
734 for (aj = a0; aj < a1; aj++)
736 bExcl[aj-a0] = FALSE;
738 /* Loop over all the exclusions of atom ai */
739 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
742 if (aj < a0 || aj >= a1)
751 /* Check if ai excludes a0 to a1 */
752 for (aj = a0; aj < a1; aj++)
756 bExclIntraAll = FALSE;
763 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
766 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
774 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
778 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
780 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
782 /* The size in cginfo is currently only read with DD */
783 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
787 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
791 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
793 /* Store the charge group size */
794 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
796 if (!bExclIntraAll || bExclInter)
798 *bExcl_IntraCGAll_InterCGNone = FALSE;
805 cg_offset += molb->nmol*cgs->nr;
806 a_offset += molb->nmol*cgs->index[cgs->nr];
810 /* the solvent optimizer is called after the QM is initialized,
811 * because we don't want to have the QM subsystemto become an
815 check_solvent(fplog, mtop, fr, cginfo_mb);
817 if (getenv("GMX_NO_SOLV_OPT"))
821 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
822 "Disabling all solvent optimization\n");
824 fr->solvent_opt = esolNO;
828 fr->solvent_opt = esolNO;
830 if (!fr->solvent_opt)
832 for (mb = 0; mb < mtop->nmolblock; mb++)
834 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
836 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
844 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
849 ncg = cgi_mb[nmb-1].cg_end;
852 for (cg = 0; cg < ncg; cg++)
854 while (cg >= cgi_mb[mb].cg_end)
859 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
865 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
867 /*This now calculates sum for q and c6*/
868 double qsum, q2sum, q, c6sum, c6, sqrc6;
870 const t_atoms *atoms;
875 for (mb = 0; mb < mtop->nmolblock; mb++)
877 nmol = mtop->molblock[mb].nmol;
878 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
879 for (i = 0; i < atoms->nr; i++)
881 q = atoms->atom[i].q;
884 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
886 c6sum += nmol*sqrc6*sqrc6;
890 fr->q2sum[0] = q2sum;
891 fr->c6sum[0] = c6sum/12;
893 if (fr->efep != efepNO)
898 for (mb = 0; mb < mtop->nmolblock; mb++)
900 nmol = mtop->molblock[mb].nmol;
901 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
902 for (i = 0; i < atoms->nr; i++)
904 q = atoms->atom[i].qB;
907 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
909 c6sum += nmol*sqrc6*sqrc6;
912 fr->q2sum[1] = q2sum;
913 fr->c6sum[1] = c6sum/12;
918 fr->qsum[1] = fr->qsum[0];
919 fr->q2sum[1] = fr->q2sum[0];
920 fr->c6sum[1] = fr->c6sum[0];
924 if (fr->efep == efepNO)
926 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
930 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
931 fr->qsum[0], fr->qsum[1]);
936 void update_forcerec(t_forcerec *fr, matrix box)
938 if (fr->eeltype == eelGRF)
940 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
941 fr->rcoulomb, fr->temp, fr->zsquare, box,
942 &fr->kappa, &fr->k_rf, &fr->c_rf);
946 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
948 const t_atoms *atoms, *atoms_tpi;
949 const t_blocka *excl;
950 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
951 gmx_int64_t npair, npair_ij, tmpi, tmpj;
952 double csix, ctwelve;
956 real *nbfp_comb = NULL;
962 /* For LJ-PME, we want to correct for the difference between the
963 * actual C6 values and the C6 values used by the LJ-PME based on
964 * combination rules. */
966 if (EVDW_PME(fr->vdwtype))
968 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
969 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
970 for (tpi = 0; tpi < ntp; ++tpi)
972 for (tpj = 0; tpj < ntp; ++tpj)
974 C6(nbfp_comb, ntp, tpi, tpj) =
975 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
976 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
981 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
989 /* Count the types so we avoid natoms^2 operations */
990 snew(typecount, ntp);
991 gmx_mtop_count_atomtypes(mtop, q, typecount);
993 for (tpi = 0; tpi < ntp; tpi++)
995 for (tpj = tpi; tpj < ntp; tpj++)
997 tmpi = typecount[tpi];
998 tmpj = typecount[tpj];
1001 npair_ij = tmpi*tmpj;
1005 npair_ij = tmpi*(tmpi - 1)/2;
1009 /* nbfp now includes the 6.0 derivative prefactor */
1010 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1014 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1015 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1016 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1022 /* Subtract the excluded pairs.
1023 * The main reason for substracting exclusions is that in some cases
1024 * some combinations might never occur and the parameters could have
1025 * any value. These unused values should not influence the dispersion
1028 for (mb = 0; mb < mtop->nmolblock; mb++)
1030 nmol = mtop->molblock[mb].nmol;
1031 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1032 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1033 for (i = 0; (i < atoms->nr); i++)
1037 tpi = atoms->atom[i].type;
1041 tpi = atoms->atom[i].typeB;
1043 j1 = excl->index[i];
1044 j2 = excl->index[i+1];
1045 for (j = j1; j < j2; j++)
1052 tpj = atoms->atom[k].type;
1056 tpj = atoms->atom[k].typeB;
1060 /* nbfp now includes the 6.0 derivative prefactor */
1061 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1065 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1066 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1067 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1077 /* Only correct for the interaction of the test particle
1078 * with the rest of the system.
1081 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1084 for (mb = 0; mb < mtop->nmolblock; mb++)
1086 nmol = mtop->molblock[mb].nmol;
1087 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1088 for (j = 0; j < atoms->nr; j++)
1091 /* Remove the interaction of the test charge group
1094 if (mb == mtop->nmolblock-1)
1098 if (mb == 0 && nmol == 1)
1100 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1105 tpj = atoms->atom[j].type;
1109 tpj = atoms->atom[j].typeB;
1111 for (i = 0; i < fr->n_tpi; i++)
1115 tpi = atoms_tpi->atom[i].type;
1119 tpi = atoms_tpi->atom[i].typeB;
1123 /* nbfp now includes the 6.0 derivative prefactor */
1124 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1128 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1129 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1130 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1137 if (npair - nexcl <= 0 && fplog)
1139 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1145 csix /= npair - nexcl;
1146 ctwelve /= npair - nexcl;
1150 fprintf(debug, "Counted %d exclusions\n", nexcl);
1151 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1152 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1154 fr->avcsix[q] = csix;
1155 fr->avctwelve[q] = ctwelve;
1158 if (EVDW_PME(fr->vdwtype))
1165 if (fr->eDispCorr == edispcAllEner ||
1166 fr->eDispCorr == edispcAllEnerPres)
1168 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1169 fr->avcsix[0], fr->avctwelve[0]);
1173 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1179 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1180 const gmx_mtop_t *mtop)
1182 const t_atoms *at1, *at2;
1183 int mt1, mt2, i, j, tpi, tpj, ntypes;
1189 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1196 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1198 at1 = &mtop->moltype[mt1].atoms;
1199 for (i = 0; (i < at1->nr); i++)
1201 tpi = at1->atom[i].type;
1204 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1207 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1209 at2 = &mtop->moltype[mt2].atoms;
1210 for (j = 0; (j < at2->nr); j++)
1212 tpj = at2->atom[j].type;
1215 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1217 b = BHAMB(nbfp, ntypes, tpi, tpj);
1218 if (b > fr->bham_b_max)
1222 if ((b < bmin) || (bmin == -1))
1232 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1233 bmin, fr->bham_b_max);
1237 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1238 t_forcerec *fr, real rtab,
1239 const t_commrec *cr,
1240 const char *tabfn, char *eg1, char *eg2,
1250 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1255 sprintf(buf, "%s", tabfn);
1258 /* Append the two energy group names */
1259 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1260 eg1, eg2, ftp2ext(efXVG));
1262 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1263 /* Copy the contents of the table to separate coulomb and LJ tables too,
1264 * to improve cache performance.
1266 /* For performance reasons we want
1267 * the table data to be aligned to 16-byte. The pointers could be freed
1268 * but currently aren't.
1270 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1271 nbl->table_elec.format = nbl->table_elec_vdw.format;
1272 nbl->table_elec.r = nbl->table_elec_vdw.r;
1273 nbl->table_elec.n = nbl->table_elec_vdw.n;
1274 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1275 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1276 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1277 nbl->table_elec.ninteractions = 1;
1278 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1279 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1281 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1282 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1283 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1284 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1285 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1286 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1287 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1288 nbl->table_vdw.ninteractions = 2;
1289 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1290 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1292 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1294 for (j = 0; j < 4; j++)
1296 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1298 for (j = 0; j < 8; j++)
1300 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1305 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1306 int *ncount, int **count)
1308 const gmx_moltype_t *molt;
1310 int mt, ftype, stride, i, j, tabnr;
1312 for (mt = 0; mt < mtop->nmoltype; mt++)
1314 molt = &mtop->moltype[mt];
1315 for (ftype = 0; ftype < F_NRE; ftype++)
1317 if (ftype == ftype1 || ftype == ftype2)
1319 il = &molt->ilist[ftype];
1320 stride = 1 + NRAL(ftype);
1321 for (i = 0; i < il->nr; i += stride)
1323 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1326 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1328 if (tabnr >= *ncount)
1330 srenew(*count, tabnr+1);
1331 for (j = *ncount; j < tabnr+1; j++)
1344 static bondedtable_t *make_bonded_tables(FILE *fplog,
1345 int ftype1, int ftype2,
1346 const gmx_mtop_t *mtop,
1347 const char *basefn, const char *tabext)
1349 int i, ncount, *count;
1357 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1362 for (i = 0; i < ncount; i++)
1366 sprintf(tabfn, "%s", basefn);
1367 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1368 tabext, i, ftp2ext(efXVG));
1369 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1378 void forcerec_set_ranges(t_forcerec *fr,
1379 int ncg_home, int ncg_force,
1381 int natoms_force_constr, int natoms_f_novirsum)
1386 /* fr->ncg_force is unused in the standard code,
1387 * but it can be useful for modified code dealing with charge groups.
1389 fr->ncg_force = ncg_force;
1390 fr->natoms_force = natoms_force;
1391 fr->natoms_force_constr = natoms_force_constr;
1393 if (fr->natoms_force_constr > fr->nalloc_force)
1395 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1399 srenew(fr->f_twin, fr->nalloc_force);
1403 if (fr->bF_NoVirSum)
1405 fr->f_novirsum_n = natoms_f_novirsum;
1406 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1408 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1409 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1414 fr->f_novirsum_n = 0;
1418 static real cutoff_inf(real cutoff)
1422 cutoff = GMX_CUTOFF_INF;
1428 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1429 t_forcerec *fr, const t_inputrec *ir,
1430 const char *tabfn, const gmx_mtop_t *mtop,
1438 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1442 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1444 sprintf(buf, "%s", tabfn);
1445 for (i = 0; i < ir->adress->n_tf_grps; i++)
1447 j = ir->adress->tf_table_index[i]; /* get energy group index */
1448 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1449 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1452 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1454 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1459 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1466 ir->rcoulomb == 0 &&
1468 ir->ePBC == epbcNONE &&
1469 ir->vdwtype == evdwCUT &&
1470 ir->coulombtype == eelCUT &&
1471 ir->efep == efepNO &&
1472 (ir->implicit_solvent == eisNO ||
1473 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1474 ir->gb_algorithm == egbHCT ||
1475 ir->gb_algorithm == egbOBC))) &&
1476 getenv("GMX_NO_ALLVSALL") == NULL
1479 if (bAllvsAll && ir->opts.ngener > 1)
1481 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1487 fprintf(stderr, "\n%s\n", note);
1491 fprintf(fp, "\n%s\n", note);
1497 if (bAllvsAll && fp && MASTER(cr))
1499 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1506 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1510 /* These thread local data structures are used for bondeds only */
1511 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1513 if (fr->nthreads > 1)
1515 snew(fr->f_t, fr->nthreads);
1516 /* Thread 0 uses the global force and energy arrays */
1517 for (t = 1; t < fr->nthreads; t++)
1519 fr->f_t[t].f = NULL;
1520 fr->f_t[t].f_nalloc = 0;
1521 snew(fr->f_t[t].fshift, SHIFTS);
1522 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1523 for (i = 0; i < egNR; i++)
1525 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1532 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1536 *kernel_type = nbnxnk4x4_PlainC;
1537 *ewald_excl = ewaldexclTable;
1539 #ifdef GMX_NBNXN_SIMD
1541 #ifdef GMX_NBNXN_SIMD_4XN
1542 *kernel_type = nbnxnk4xN_SIMD_4xN;
1544 #ifdef GMX_NBNXN_SIMD_2XNN
1545 /* We expect the 2xNN kernels to be faster in most cases */
1546 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1549 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_SIMD_X86_AVX_256_OR_HIGHER
1550 if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1552 /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1553 * 10% with HT, 50% without HT, but extra zeros interactions
1554 * can compensate. As we currently don't detect the actual use
1555 * of HT, switch to 4x8 to avoid a potential performance hit.
1557 *kernel_type = nbnxnk4xN_SIMD_4xN;
1560 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1562 #ifdef GMX_NBNXN_SIMD_4XN
1563 *kernel_type = nbnxnk4xN_SIMD_4xN;
1565 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1568 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1570 #ifdef GMX_NBNXN_SIMD_2XNN
1571 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1573 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1577 /* Analytical Ewald exclusion correction is only an option in
1578 * the SIMD kernel. On BlueGene/Q, this is faster regardless
1579 * of precision. In single precision, this is faster on
1580 * Bulldozer, and slightly faster on Sandy Bridge.
1582 #if ((defined GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER || defined GMX_SIMD_X86_AVX_256_OR_HIGHER || defined __MIC__) && !defined GMX_DOUBLE) || (defined GMX_SIMD_IBM_QPX)
1583 *ewald_excl = ewaldexclAnalytical;
1585 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1587 *ewald_excl = ewaldexclTable;
1589 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1591 *ewald_excl = ewaldexclAnalytical;
1595 #endif /* GMX_NBNXN_SIMD */
1599 const char *lookup_nbnxn_kernel_name(int kernel_type)
1601 const char *returnvalue = NULL;
1602 switch (kernel_type)
1605 returnvalue = "not set";
1607 case nbnxnk4x4_PlainC:
1608 returnvalue = "plain C";
1610 case nbnxnk4xN_SIMD_4xN:
1611 case nbnxnk4xN_SIMD_2xNN:
1612 #ifdef GMX_NBNXN_SIMD
1613 #ifdef GMX_SIMD_X86_SSE2_OR_HIGHER
1614 /* We have x86 SSE2 compatible SIMD */
1615 #ifdef GMX_SIMD_X86_AVX_128_FMA_OR_HIGHER
1616 returnvalue = "AVX-128-FMA";
1618 #if defined GMX_SIMD_X86_AVX_256_OR_HIGHER || defined __AVX__
1619 /* x86 SIMD intrinsics can be converted to SSE or AVX depending
1620 * on compiler flags. As we use nearly identical intrinsics,
1621 * compiling for AVX without an AVX macros effectively results
1623 * For gcc we check for __AVX__
1624 * At least a check for icc should be added (if there is a macro)
1626 #if defined GMX_SIMD_X86_AVX_256_OR_HIGHER && !defined GMX_NBNXN_HALF_WIDTH_SIMD
1627 returnvalue = "AVX-256";
1629 returnvalue = "AVX-128";
1632 #ifdef GMX_SIMD_X86_SSE4_1_OR_HIGHER
1633 returnvalue = "SSE4.1";
1635 returnvalue = "SSE2";
1639 #else /* GMX_SIMD_X86_SSE2_OR_HIGHER */
1640 /* not GMX_SIMD_X86_SSE2_OR_HIGHER, but other SIMD */
1641 returnvalue = "SIMD";
1642 #endif /* GMX_SIMD_X86_SSE2_OR_HIGHER */
1643 #else /* GMX_NBNXN_SIMD */
1644 returnvalue = "not available";
1645 #endif /* GMX_NBNXN_SIMD */
1647 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1648 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1652 gmx_fatal(FARGS, "Illegal kernel type selected");
1659 static void pick_nbnxn_kernel(FILE *fp,
1660 const t_commrec *cr,
1661 gmx_bool use_simd_kernels,
1663 gmx_bool bEmulateGPU,
1664 const t_inputrec *ir,
1667 gmx_bool bDoNonbonded)
1669 assert(kernel_type);
1671 *kernel_type = nbnxnkNotSet;
1672 *ewald_excl = ewaldexclTable;
1676 *kernel_type = nbnxnk8x8x8_PlainC;
1680 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1685 *kernel_type = nbnxnk8x8x8_CUDA;
1688 if (*kernel_type == nbnxnkNotSet)
1690 if (use_simd_kernels)
1692 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1696 *kernel_type = nbnxnk4x4_PlainC;
1700 if (bDoNonbonded && fp != NULL)
1702 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1703 lookup_nbnxn_kernel_name(*kernel_type),
1704 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1705 nbnxn_kernel_to_cj_size(*kernel_type));
1709 static void pick_nbnxn_resources(const t_commrec *cr,
1710 const gmx_hw_info_t *hwinfo,
1711 gmx_bool bDoNonbonded,
1713 gmx_bool *bEmulateGPU,
1714 const gmx_gpu_opt_t *gpu_opt)
1716 gmx_bool bEmulateGPUEnvVarSet;
1717 char gpu_err_str[STRLEN];
1721 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1723 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1724 * GPUs (currently) only handle non-bonded calculations, we will
1725 * automatically switch to emulation if non-bonded calculations are
1726 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1727 * way to turn off GPU initialization, data movement, and cleanup.
1729 * GPU emulation can be useful to assess the performance one can expect by
1730 * adding GPU(s) to the machine. The conditional below allows this even
1731 * if mdrun is compiled without GPU acceleration support.
1732 * Note that you should freezing the system as otherwise it will explode.
1734 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1736 gpu_opt->ncuda_dev_use > 0));
1738 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1740 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1742 /* Each PP node will use the intra-node id-th device from the
1743 * list of detected/selected GPUs. */
1744 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1745 &hwinfo->gpu_info, gpu_opt))
1747 /* At this point the init should never fail as we made sure that
1748 * we have all the GPUs we need. If it still does, we'll bail. */
1749 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1751 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1752 cr->rank_pp_intranode),
1756 /* Here we actually turn on hardware GPU acceleration */
1761 gmx_bool uses_simple_tables(int cutoff_scheme,
1762 nonbonded_verlet_t *nbv,
1765 gmx_bool bUsesSimpleTables = TRUE;
1768 switch (cutoff_scheme)
1771 bUsesSimpleTables = TRUE;
1774 assert(NULL != nbv && NULL != nbv->grp);
1775 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1776 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1779 gmx_incons("unimplemented");
1781 return bUsesSimpleTables;
1784 static void init_ewald_f_table(interaction_const_t *ic,
1785 gmx_bool bUsesSimpleTables,
1790 if (bUsesSimpleTables)
1792 /* With a spacing of 0.0005 we are at the force summation accuracy
1793 * for the SSE kernels for "normal" atomistic simulations.
1795 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1798 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1799 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1803 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1804 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1805 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1808 sfree_aligned(ic->tabq_coul_FDV0);
1809 sfree_aligned(ic->tabq_coul_F);
1810 sfree_aligned(ic->tabq_coul_V);
1812 /* Create the original table data in FDV0 */
1813 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1814 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1815 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1816 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1817 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1820 void init_interaction_const_tables(FILE *fp,
1821 interaction_const_t *ic,
1822 gmx_bool bUsesSimpleTables,
1827 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1829 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1833 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1834 1/ic->tabq_scale, ic->tabq_size);
1839 static void clear_force_switch_constants(shift_consts_t *sc)
1846 static void force_switch_constants(real p,
1850 /* Here we determine the coefficient for shifting the force to zero
1851 * between distance rsw and the cut-off rc.
1852 * For a potential of r^-p, we have force p*r^-(p+1).
1853 * But to save flops we absorb p in the coefficient.
1855 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1856 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1858 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1859 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1860 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1863 static void potential_switch_constants(real rsw, real rc,
1864 switch_consts_t *sc)
1866 /* The switch function is 1 at rsw and 0 at rc.
1867 * The derivative and second derivate are zero at both ends.
1868 * rsw = max(r - r_switch, 0)
1869 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1870 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1871 * force = force*dsw - potential*sw
1874 sc->c3 = -10*pow(rc - rsw, -3);
1875 sc->c4 = 15*pow(rc - rsw, -4);
1876 sc->c5 = -6*pow(rc - rsw, -5);
1880 init_interaction_const(FILE *fp,
1881 const t_commrec gmx_unused *cr,
1882 interaction_const_t **interaction_const,
1883 const t_forcerec *fr,
1886 interaction_const_t *ic;
1887 gmx_bool bUsesSimpleTables = TRUE;
1891 /* Just allocate something so we can free it */
1892 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1893 snew_aligned(ic->tabq_coul_F, 16, 32);
1894 snew_aligned(ic->tabq_coul_V, 16, 32);
1896 ic->rlist = fr->rlist;
1897 ic->rlistlong = fr->rlistlong;
1900 ic->vdw_modifier = fr->vdw_modifier;
1901 ic->rvdw = fr->rvdw;
1902 ic->rvdw_switch = fr->rvdw_switch;
1903 clear_force_switch_constants(&ic->dispersion_shift);
1904 clear_force_switch_constants(&ic->repulsion_shift);
1906 switch (ic->vdw_modifier)
1908 case eintmodPOTSHIFT:
1909 /* Only shift the potential, don't touch the force */
1910 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
1911 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
1913 case eintmodFORCESWITCH:
1914 /* Switch the force, switch and shift the potential */
1915 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
1916 &ic->dispersion_shift);
1917 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
1918 &ic->repulsion_shift);
1920 case eintmodPOTSWITCH:
1921 /* Switch the potential and force */
1922 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
1926 case eintmodEXACTCUTOFF:
1927 /* Nothing to do here */
1930 gmx_incons("unimplemented potential modifier");
1933 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
1935 /* Electrostatics */
1936 ic->eeltype = fr->eeltype;
1937 ic->rcoulomb = fr->rcoulomb;
1938 ic->epsilon_r = fr->epsilon_r;
1939 ic->epsfac = fr->epsfac;
1942 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
1943 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1945 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1947 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
1954 /* Reaction-field */
1955 if (EEL_RF(ic->eeltype))
1957 ic->epsilon_rf = fr->epsilon_rf;
1958 ic->k_rf = fr->k_rf;
1959 ic->c_rf = fr->c_rf;
1963 /* For plain cut-off we might use the reaction-field kernels */
1964 ic->epsilon_rf = ic->epsilon_r;
1966 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1968 ic->c_rf = 1/ic->rcoulomb;
1978 fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1979 ic->repulsion_shift.cpot, ic->dispersion_shift.cpot);
1980 if (ic->eeltype == eelCUT)
1982 fprintf(fp, ", Coulomb %.3f", -ic->c_rf);
1984 else if (EEL_PME(ic->eeltype))
1986 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
1991 *interaction_const = ic;
1993 if (fr->nbv != NULL && fr->nbv->bUseGPU)
1995 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
1997 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
1998 * also sharing texture references. To keep the code simple, we don't
1999 * treat texture references as shared resources, but this means that
2000 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2001 * Hence, to ensure that the non-bonded kernels don't start before all
2002 * texture binding operations are finished, we need to wait for all ranks
2003 * to arrive here before continuing.
2005 * Note that we could omit this barrier if GPUs are not shared (or
2006 * texture objects are used), but as this is initialization code, there
2007 * is not point in complicating things.
2009 #ifdef GMX_THREAD_MPI
2014 #endif /* GMX_THREAD_MPI */
2017 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2018 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2021 static void init_nb_verlet(FILE *fp,
2022 nonbonded_verlet_t **nb_verlet,
2023 const t_inputrec *ir,
2024 const t_forcerec *fr,
2025 const t_commrec *cr,
2026 const char *nbpu_opt)
2028 nonbonded_verlet_t *nbv;
2031 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2033 nbnxn_alloc_t *nb_alloc;
2034 nbnxn_free_t *nb_free;
2038 pick_nbnxn_resources(cr, fr->hwinfo,
2046 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2047 for (i = 0; i < nbv->ngrp; i++)
2049 nbv->grp[i].nbl_lists.nnbl = 0;
2050 nbv->grp[i].nbat = NULL;
2051 nbv->grp[i].kernel_type = nbnxnkNotSet;
2053 if (i == 0) /* local */
2055 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2056 nbv->bUseGPU, bEmulateGPU, ir,
2057 &nbv->grp[i].kernel_type,
2058 &nbv->grp[i].ewald_excl,
2061 else /* non-local */
2063 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2065 /* Use GPU for local, select a CPU kernel for non-local */
2066 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2068 &nbv->grp[i].kernel_type,
2069 &nbv->grp[i].ewald_excl,
2072 bHybridGPURun = TRUE;
2076 /* Use the same kernel for local and non-local interactions */
2077 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2078 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2085 /* init the NxN GPU data; the last argument tells whether we'll have
2086 * both local and non-local NB calculation on GPU */
2087 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2088 &fr->hwinfo->gpu_info, fr->gpu_opt,
2089 cr->rank_pp_intranode,
2090 (nbv->ngrp > 1) && !bHybridGPURun);
2092 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2096 nbv->min_ci_balanced = strtol(env, &end, 10);
2097 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2099 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2104 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2105 nbv->min_ci_balanced);
2110 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2113 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2114 nbv->min_ci_balanced);
2120 nbv->min_ci_balanced = 0;
2125 nbnxn_init_search(&nbv->nbs,
2126 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2127 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2128 gmx_omp_nthreads_get(emntNonbonded));
2130 for (i = 0; i < nbv->ngrp; i++)
2132 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2134 nb_alloc = &pmalloc;
2143 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2144 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2145 /* 8x8x8 "non-simple" lists are ATM always combined */
2146 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2150 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2152 gmx_bool bSimpleList;
2154 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2156 snew(nbv->grp[i].nbat, 1);
2157 nbnxn_atomdata_init(fp,
2159 nbv->grp[i].kernel_type,
2160 /* SIMD LJ switch kernels don't support LJ combination rules */
2161 bSimpleList && !(fr->vdw_modifier == eintmodFORCESWITCH || fr->vdw_modifier == eintmodPOTSWITCH),
2162 fr->ntype, fr->nbfp,
2164 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2169 nbv->grp[i].nbat = nbv->grp[0].nbat;
2174 void init_forcerec(FILE *fp,
2175 const output_env_t oenv,
2178 const t_inputrec *ir,
2179 const gmx_mtop_t *mtop,
2180 const t_commrec *cr,
2186 const char *nbpu_opt,
2187 gmx_bool bNoSolvOpt,
2190 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2195 gmx_bool bGenericKernelOnly;
2196 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2198 int *nm_ind, egp_flags;
2200 if (fr->hwinfo == NULL)
2202 /* Detect hardware, gather information.
2203 * In mdrun, hwinfo has already been set before calling init_forcerec.
2204 * Here we ignore GPUs, as tools will not use them anyhow.
2206 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2209 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2210 fr->use_simd_kernels = TRUE;
2212 fr->bDomDec = DOMAINDECOMP(cr);
2214 natoms = mtop->natoms;
2216 if (check_box(ir->ePBC, box))
2218 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2221 /* Test particle insertion ? */
2224 /* Set to the size of the molecule to be inserted (the last one) */
2225 /* Because of old style topologies, we have to use the last cg
2226 * instead of the last molecule type.
2228 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2229 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2230 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2232 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2240 /* Copy AdResS parameters */
2243 fr->adress_type = ir->adress->type;
2244 fr->adress_const_wf = ir->adress->const_wf;
2245 fr->adress_ex_width = ir->adress->ex_width;
2246 fr->adress_hy_width = ir->adress->hy_width;
2247 fr->adress_icor = ir->adress->icor;
2248 fr->adress_site = ir->adress->site;
2249 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2250 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2253 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2254 for (i = 0; i < ir->adress->n_energy_grps; i++)
2256 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2259 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2260 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2261 for (i = 0; i < fr->n_adress_tf_grps; i++)
2263 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2265 copy_rvec(ir->adress->refs, fr->adress_refs);
2269 fr->adress_type = eAdressOff;
2270 fr->adress_do_hybridpairs = FALSE;
2273 /* Copy the user determined parameters */
2274 fr->userint1 = ir->userint1;
2275 fr->userint2 = ir->userint2;
2276 fr->userint3 = ir->userint3;
2277 fr->userint4 = ir->userint4;
2278 fr->userreal1 = ir->userreal1;
2279 fr->userreal2 = ir->userreal2;
2280 fr->userreal3 = ir->userreal3;
2281 fr->userreal4 = ir->userreal4;
2284 fr->fc_stepsize = ir->fc_stepsize;
2287 fr->efep = ir->efep;
2288 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2289 if (ir->fepvals->bScCoul)
2291 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2292 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2296 fr->sc_alphacoul = 0;
2297 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2299 fr->sc_power = ir->fepvals->sc_power;
2300 fr->sc_r_power = ir->fepvals->sc_r_power;
2301 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2303 env = getenv("GMX_SCSIGMA_MIN");
2307 sscanf(env, "%lf", &dbl);
2308 fr->sc_sigma6_min = pow(dbl, 6);
2311 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2315 fr->bNonbonded = TRUE;
2316 if (getenv("GMX_NO_NONBONDED") != NULL)
2318 /* turn off non-bonded calculations */
2319 fr->bNonbonded = FALSE;
2320 md_print_warn(cr, fp,
2321 "Found environment variable GMX_NO_NONBONDED.\n"
2322 "Disabling nonbonded calculations.\n");
2325 bGenericKernelOnly = FALSE;
2327 /* We now check in the NS code whether a particular combination of interactions
2328 * can be used with water optimization, and disable it if that is not the case.
2331 if (getenv("GMX_NB_GENERIC") != NULL)
2336 "Found environment variable GMX_NB_GENERIC.\n"
2337 "Disabling all interaction-specific nonbonded kernels, will only\n"
2338 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2340 bGenericKernelOnly = TRUE;
2343 if (bGenericKernelOnly == TRUE)
2348 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2350 fr->use_simd_kernels = FALSE;
2354 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2355 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2359 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2361 /* Check if we can/should do all-vs-all kernels */
2362 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2363 fr->AllvsAll_work = NULL;
2364 fr->AllvsAll_workgb = NULL;
2366 /* All-vs-all kernels have not been implemented in 4.6, and
2367 * the SIMD group kernels are also buggy in this case. Non-SIMD
2368 * group kernels are OK. See Redmine #1249. */
2371 fr->bAllvsAll = FALSE;
2372 fr->use_simd_kernels = FALSE;
2376 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2377 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2378 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2379 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2380 "we are proceeding by disabling all CPU architecture-specific\n"
2381 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2382 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2386 /* Neighbour searching stuff */
2387 fr->cutoff_scheme = ir->cutoff_scheme;
2388 fr->bGrid = (ir->ns_type == ensGRID);
2389 fr->ePBC = ir->ePBC;
2391 /* Determine if we will do PBC for distances in bonded interactions */
2392 if (fr->ePBC == epbcNONE)
2394 fr->bMolPBC = FALSE;
2398 if (!DOMAINDECOMP(cr))
2400 /* The group cut-off scheme and SHAKE assume charge groups
2401 * are whole, but not using molpbc is faster in most cases.
2403 if (fr->cutoff_scheme == ecutsGROUP ||
2404 (ir->eConstrAlg == econtSHAKE &&
2405 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2406 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2408 fr->bMolPBC = ir->bPeriodicMols;
2413 if (getenv("GMX_USE_GRAPH") != NULL)
2415 fr->bMolPBC = FALSE;
2418 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2425 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2428 fr->bGB = (ir->implicit_solvent == eisGBSA);
2430 fr->rc_scaling = ir->refcoord_scaling;
2431 copy_rvec(ir->posres_com, fr->posres_com);
2432 copy_rvec(ir->posres_comB, fr->posres_comB);
2433 fr->rlist = cutoff_inf(ir->rlist);
2434 fr->rlistlong = cutoff_inf(ir->rlistlong);
2435 fr->eeltype = ir->coulombtype;
2436 fr->vdwtype = ir->vdwtype;
2437 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2439 fr->coulomb_modifier = ir->coulomb_modifier;
2440 fr->vdw_modifier = ir->vdw_modifier;
2442 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2443 switch (fr->eeltype)
2446 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2452 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2456 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2457 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2466 case eelPMEUSERSWITCH:
2467 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2472 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2476 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2480 /* Vdw: Translate from mdp settings to kernel format */
2481 switch (fr->vdwtype)
2487 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2491 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2498 case evdwENCADSHIFT:
2499 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2503 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2507 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2508 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2509 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2511 fr->bTwinRange = fr->rlistlong > fr->rlist;
2512 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2514 fr->reppow = mtop->ffparams.reppow;
2516 if (ir->cutoff_scheme == ecutsGROUP)
2518 fr->bvdwtab = (fr->vdwtype != evdwCUT ||
2519 !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2520 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2521 fr->bcoultab = !(fr->eeltype == eelCUT ||
2522 fr->eeltype == eelEWALD ||
2523 fr->eeltype == eelPME ||
2524 fr->eeltype == eelRF ||
2525 fr->eeltype == eelRF_ZERO);
2527 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2528 * going to be faster to tabulate the interaction than calling the generic kernel.
2530 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2532 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2534 fr->bcoultab = TRUE;
2537 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2538 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2539 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2540 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2542 if (fr->rcoulomb != fr->rvdw)
2544 fr->bcoultab = TRUE;
2548 if (getenv("GMX_REQUIRE_TABLES"))
2551 fr->bcoultab = TRUE;
2556 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2557 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2560 if (fr->bvdwtab == TRUE)
2562 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2563 fr->nbkernel_vdw_modifier = eintmodNONE;
2565 if (fr->bcoultab == TRUE)
2567 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2568 fr->nbkernel_elec_modifier = eintmodNONE;
2572 if (ir->cutoff_scheme == ecutsVERLET)
2574 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2576 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2578 fr->bvdwtab = FALSE;
2579 fr->bcoultab = FALSE;
2582 /* Tables are used for direct ewald sum */
2585 if (EEL_PME(ir->coulombtype))
2589 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2591 if (ir->coulombtype == eelP3M_AD)
2593 please_cite(fp, "Hockney1988");
2594 please_cite(fp, "Ballenegger2012");
2598 please_cite(fp, "Essmann95a");
2601 if (ir->ewald_geometry == eewg3DC)
2605 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2607 please_cite(fp, "In-Chul99a");
2610 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2611 init_ewald_tab(&(fr->ewald_table), ir, fp);
2614 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2615 1/fr->ewaldcoeff_q);
2619 if (EVDW_PME(ir->vdwtype))
2623 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2625 please_cite(fp, "Essman95a");
2626 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2629 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2630 1/fr->ewaldcoeff_lj);
2634 /* Electrostatics */
2635 fr->epsilon_r = ir->epsilon_r;
2636 fr->epsilon_rf = ir->epsilon_rf;
2637 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2638 fr->rcoulomb_switch = ir->rcoulomb_switch;
2639 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2641 /* Parameters for generalized RF */
2645 if (fr->eeltype == eelGRF)
2647 init_generalized_rf(fp, mtop, ir, fr);
2650 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2651 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2652 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2653 IR_ELEC_FIELD(*ir) ||
2654 (fr->adress_icor != eAdressICOff)
2657 if (fr->cutoff_scheme == ecutsGROUP &&
2658 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2660 /* Count the total number of charge groups */
2661 fr->cg_nalloc = ncg_mtop(mtop);
2662 srenew(fr->cg_cm, fr->cg_nalloc);
2664 if (fr->shift_vec == NULL)
2666 snew(fr->shift_vec, SHIFTS);
2669 if (fr->fshift == NULL)
2671 snew(fr->fshift, SHIFTS);
2674 if (fr->nbfp == NULL)
2676 fr->ntype = mtop->ffparams.atnr;
2677 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2680 /* Copy the energy group exclusions */
2681 fr->egp_flags = ir->opts.egp_flags;
2683 /* Van der Waals stuff */
2684 fr->rvdw = cutoff_inf(ir->rvdw);
2685 fr->rvdw_switch = ir->rvdw_switch;
2686 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2688 if (fr->rvdw_switch >= fr->rvdw)
2690 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2691 fr->rvdw_switch, fr->rvdw);
2695 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2696 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2697 fr->rvdw_switch, fr->rvdw);
2701 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2703 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2706 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2708 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2713 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2714 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2717 fr->eDispCorr = ir->eDispCorr;
2718 if (ir->eDispCorr != edispcNO)
2720 set_avcsixtwelve(fp, fr, mtop);
2725 set_bham_b_max(fp, fr, mtop);
2728 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2730 /* Copy the GBSA data (radius, volume and surftens for each
2731 * atomtype) from the topology atomtype section to forcerec.
2733 snew(fr->atype_radius, fr->ntype);
2734 snew(fr->atype_vol, fr->ntype);
2735 snew(fr->atype_surftens, fr->ntype);
2736 snew(fr->atype_gb_radius, fr->ntype);
2737 snew(fr->atype_S_hct, fr->ntype);
2739 if (mtop->atomtypes.nr > 0)
2741 for (i = 0; i < fr->ntype; i++)
2743 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2745 for (i = 0; i < fr->ntype; i++)
2747 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2749 for (i = 0; i < fr->ntype; i++)
2751 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2753 for (i = 0; i < fr->ntype; i++)
2755 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2757 for (i = 0; i < fr->ntype; i++)
2759 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2763 /* Generate the GB table if needed */
2767 fr->gbtabscale = 2000;
2769 fr->gbtabscale = 500;
2773 fr->gbtab = make_gb_table(oenv, fr);
2775 init_gb(&fr->born, cr, fr, ir, mtop, ir->gb_algorithm);
2777 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2778 if (!DOMAINDECOMP(cr))
2780 make_local_gb(cr, fr->born, ir->gb_algorithm);
2784 /* Set the charge scaling */
2785 if (fr->epsilon_r != 0)
2787 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2791 /* eps = 0 is infinite dieletric: no coulomb interactions */
2795 /* Reaction field constants */
2796 if (EEL_RF(fr->eeltype))
2798 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2799 fr->rcoulomb, fr->temp, fr->zsquare, box,
2800 &fr->kappa, &fr->k_rf, &fr->c_rf);
2803 /*This now calculates sum for q and c6*/
2804 set_chargesum(fp, fr, mtop);
2806 /* if we are using LR electrostatics, and they are tabulated,
2807 * the tables will contain modified coulomb interactions.
2808 * Since we want to use the non-shifted ones for 1-4
2809 * coulombic interactions, we must have an extra set of tables.
2812 /* Construct tables.
2813 * A little unnecessary to make both vdw and coul tables sometimes,
2814 * but what the heck... */
2816 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2817 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2819 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2820 fr->bBHAM || fr->bEwald) &&
2821 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2822 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2823 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2825 negp_pp = ir->opts.ngener - ir->nwall;
2829 bSomeNormalNbListsAreInUse = TRUE;
2834 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2835 for (egi = 0; egi < negp_pp; egi++)
2837 for (egj = egi; egj < negp_pp; egj++)
2839 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2840 if (!(egp_flags & EGP_EXCL))
2842 if (egp_flags & EGP_TABLE)
2848 bSomeNormalNbListsAreInUse = TRUE;
2853 if (bSomeNormalNbListsAreInUse)
2855 fr->nnblists = negptable + 1;
2859 fr->nnblists = negptable;
2861 if (fr->nnblists > 1)
2863 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2872 snew(fr->nblists, fr->nnblists);
2874 /* This code automatically gives table length tabext without cut-off's,
2875 * in that case grompp should already have checked that we do not need
2876 * normal tables and we only generate tables for 1-4 interactions.
2878 rtab = ir->rlistlong + ir->tabext;
2882 /* make tables for ordinary interactions */
2883 if (bSomeNormalNbListsAreInUse)
2885 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2888 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2890 if (!bMakeSeparate14Table)
2892 fr->tab14 = fr->nblists[0].table_elec_vdw;
2902 /* Read the special tables for certain energy group pairs */
2903 nm_ind = mtop->groups.grps[egcENER].nm_ind;
2904 for (egi = 0; egi < negp_pp; egi++)
2906 for (egj = egi; egj < negp_pp; egj++)
2908 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2909 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2911 nbl = &(fr->nblists[m]);
2912 if (fr->nnblists > 1)
2914 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2916 /* Read the table file with the two energy groups names appended */
2917 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2918 *mtop->groups.grpname[nm_ind[egi]],
2919 *mtop->groups.grpname[nm_ind[egj]],
2923 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2924 *mtop->groups.grpname[nm_ind[egi]],
2925 *mtop->groups.grpname[nm_ind[egj]],
2926 &fr->nblists[fr->nnblists/2+m]);
2930 else if (fr->nnblists > 1)
2932 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2938 if (bMakeSeparate14Table)
2940 /* generate extra tables with plain Coulomb for 1-4 interactions only */
2941 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2942 GMX_MAKETABLES_14ONLY);
2945 /* Read AdResS Thermo Force table if needed */
2946 if (fr->adress_icor == eAdressICThermoForce)
2948 /* old todo replace */
2950 if (ir->adress->n_tf_grps > 0)
2952 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2957 /* load the default table */
2958 snew(fr->atf_tabs, 1);
2959 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2964 fr->nwall = ir->nwall;
2965 if (ir->nwall && ir->wall_type == ewtTABLE)
2967 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2972 fcd->bondtab = make_bonded_tables(fp,
2973 F_TABBONDS, F_TABBONDSNC,
2975 fcd->angletab = make_bonded_tables(fp,
2978 fcd->dihtab = make_bonded_tables(fp,
2986 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2990 /* QM/MM initialization if requested
2994 fprintf(stderr, "QM/MM calculation requested.\n");
2997 fr->bQMMM = ir->bQMMM;
2998 fr->qr = mk_QMMMrec();
3000 /* Set all the static charge group info */
3001 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3002 &fr->bExcl_IntraCGAll_InterCGNone);
3003 if (DOMAINDECOMP(cr))
3009 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3012 if (!DOMAINDECOMP(cr))
3014 /* When using particle decomposition, the effect of the second argument,
3015 * which sets fr->hcg, is corrected later in do_md and init_em.
3017 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3018 mtop->natoms, mtop->natoms, mtop->natoms);
3021 fr->print_force = print_force;
3024 /* coarse load balancing vars */
3029 /* Initialize neighbor search */
3030 init_ns(fp, cr, &fr->ns, fr, mtop);
3032 if (cr->duty & DUTY_PP)
3034 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3038 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3043 /* Initialize the thread working data for bonded interactions */
3044 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3046 snew(fr->excl_load, fr->nthreads+1);
3048 if (fr->cutoff_scheme == ecutsVERLET)
3050 if (ir->rcoulomb != ir->rvdw)
3052 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3055 init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
3058 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3059 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3061 if (ir->eDispCorr != edispcNO)
3063 calc_enervirdiff(fp, ir->eDispCorr, fr);
3067 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3068 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3069 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3071 void pr_forcerec(FILE *fp, t_forcerec *fr)
3075 pr_real(fp, fr->rlist);
3076 pr_real(fp, fr->rcoulomb);
3077 pr_real(fp, fr->fudgeQQ);
3078 pr_bool(fp, fr->bGrid);
3079 pr_bool(fp, fr->bTwinRange);
3080 /*pr_int(fp,fr->cg0);
3081 pr_int(fp,fr->hcg);*/
3082 for (i = 0; i < fr->nnblists; i++)
3084 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3086 pr_real(fp, fr->rcoulomb_switch);
3087 pr_real(fp, fr->rcoulomb);
3092 void forcerec_set_excl_load(t_forcerec *fr,
3093 const gmx_localtop_t *top, const t_commrec *cr)
3096 int t, i, j, ntot, n, ntarget;
3098 if (cr != NULL && PARTDECOMP(cr))
3100 /* No OpenMP with particle decomposition */
3108 ind = top->excls.index;
3112 for (i = 0; i < top->excls.nr; i++)
3114 for (j = ind[i]; j < ind[i+1]; j++)
3123 fr->excl_load[0] = 0;
3126 for (t = 1; t <= fr->nthreads; t++)
3128 ntarget = (ntot*t)/fr->nthreads;
3129 while (i < top->excls.nr && n < ntarget)
3131 for (j = ind[i]; j < ind[i+1]; j++)
3140 fr->excl_load[t] = i;