2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
51 #include "gmx_fatal.h"
52 #include "gmx_fatal_collective.h"
56 #include "nonbonded.h"
65 #include "md_support.h"
66 #include "md_logging.h"
71 #include "mtop_util.h"
72 #include "nbnxn_search.h"
73 #include "nbnxn_atomdata.h"
74 #include "nbnxn_consts.h"
76 #include "gmx_omp_nthreads.h"
77 #include "gmx_detect_hardware.h"
80 /* MSVC definition for __cpuid() */
84 #include "types/nbnxn_cuda_types_ext.h"
85 #include "gpu_utils.h"
86 #include "nbnxn_cuda_data_mgmt.h"
87 #include "pmalloc_cuda.h"
89 t_forcerec *mk_forcerec(void)
99 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
103 for (i = 0; (i < atnr); i++)
105 for (j = 0; (j < atnr); j++)
107 fprintf(fp, "%2d - %2d", i, j);
110 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
111 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
115 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
116 C12(nbfp, atnr, i, j)/12.0);
123 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
131 snew(nbfp, 3*atnr*atnr);
132 for (i = k = 0; (i < atnr); i++)
134 for (j = 0; (j < atnr); j++, k++)
136 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
137 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
138 /* nbfp now includes the 6.0 derivative prefactor */
139 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
145 snew(nbfp, 2*atnr*atnr);
146 for (i = k = 0; (i < atnr); i++)
148 for (j = 0; (j < atnr); j++, k++)
150 /* nbfp now includes the 6.0/12.0 derivative prefactors */
151 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
152 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
160 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
164 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
168 snew(nbfp, 2*atnr*atnr);
169 for (i = 0; i < atnr; ++i)
171 for (j = 0; j < atnr; ++j)
173 c6i = idef->iparams[i*(atnr+1)].lj.c6;
174 c12i = idef->iparams[i*(atnr+1)].lj.c12;
175 c6j = idef->iparams[j*(atnr+1)].lj.c6;
176 c12j = idef->iparams[j*(atnr+1)].lj.c12;
177 c6 = sqrt(c6i * c6j);
178 c12 = sqrt(c12i * c12j);
179 if (comb_rule == eCOMB_ARITHMETIC
180 && !gmx_numzero(c6) && !gmx_numzero(c12))
182 sigmai = pow(c12i / c6i, 1.0/6.0);
183 sigmaj = pow(c12j / c6j, 1.0/6.0);
184 epsi = c6i * c6i / c12i;
185 epsj = c6j * c6j / c12j;
186 c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
187 c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
189 C6(nbfp, atnr, i, j) = c6*6.0;
190 C12(nbfp, atnr, i, j) = c12*12.0;
196 /* This routine sets fr->solvent_opt to the most common solvent in the
197 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
198 * the fr->solvent_type array with the correct type (or esolNO).
200 * Charge groups that fulfill the conditions but are not identical to the
201 * most common one will be marked as esolNO in the solvent_type array.
203 * TIP3p is identical to SPC for these purposes, so we call it
204 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
206 * NOTE: QM particle should not
207 * become an optimized solvent. Not even if there is only one charge
217 } solvent_parameters_t;
220 check_solvent_cg(const gmx_moltype_t *molt,
223 const unsigned char *qm_grpnr,
224 const t_grps *qm_grps,
226 int *n_solvent_parameters,
227 solvent_parameters_t **solvent_parameters_p,
231 const t_blocka * excl;
242 solvent_parameters_t *solvent_parameters;
244 /* We use a list with parameters for each solvent type.
245 * Every time we discover a new molecule that fulfills the basic
246 * conditions for a solvent we compare with the previous entries
247 * in these lists. If the parameters are the same we just increment
248 * the counter for that type, and otherwise we create a new type
249 * based on the current molecule.
251 * Once we've finished going through all molecules we check which
252 * solvent is most common, and mark all those molecules while we
253 * clear the flag on all others.
256 solvent_parameters = *solvent_parameters_p;
258 /* Mark the cg first as non optimized */
261 /* Check if this cg has no exclusions with atoms in other charge groups
262 * and all atoms inside the charge group excluded.
263 * We only have 3 or 4 atom solvent loops.
265 if (GET_CGINFO_EXCL_INTER(cginfo) ||
266 !GET_CGINFO_EXCL_INTRA(cginfo))
271 /* Get the indices of the first atom in this charge group */
272 j0 = molt->cgs.index[cg0];
273 j1 = molt->cgs.index[cg0+1];
275 /* Number of atoms in our molecule */
281 "Moltype '%s': there are %d atoms in this charge group\n",
285 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
288 if (nj < 3 || nj > 4)
293 /* Check if we are doing QM on this group */
295 if (qm_grpnr != NULL)
297 for (j = j0; j < j1 && !qm; j++)
299 qm = (qm_grpnr[j] < qm_grps->nr - 1);
302 /* Cannot use solvent optimization with QM */
308 atom = molt->atoms.atom;
310 /* Still looks like a solvent, time to check parameters */
312 /* If it is perturbed (free energy) we can't use the solvent loops,
313 * so then we just skip to the next molecule.
317 for (j = j0; j < j1 && !perturbed; j++)
319 perturbed = PERTURBED(atom[j]);
327 /* Now it's only a question if the VdW and charge parameters
328 * are OK. Before doing the check we compare and see if they are
329 * identical to a possible previous solvent type.
330 * First we assign the current types and charges.
332 for (j = 0; j < nj; j++)
334 tmp_vdwtype[j] = atom[j0+j].type;
335 tmp_charge[j] = atom[j0+j].q;
338 /* Does it match any previous solvent type? */
339 for (k = 0; k < *n_solvent_parameters; k++)
344 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
345 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
346 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
351 /* Check that types & charges match for all atoms in molecule */
352 for (j = 0; j < nj && match == TRUE; j++)
354 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
358 if (tmp_charge[j] != solvent_parameters[k].charge[j])
365 /* Congratulations! We have a matched solvent.
366 * Flag it with this type for later processing.
369 solvent_parameters[k].count += nmol;
371 /* We are done with this charge group */
376 /* If we get here, we have a tentative new solvent type.
377 * Before we add it we must check that it fulfills the requirements
378 * of the solvent optimized loops. First determine which atoms have
381 for (j = 0; j < nj; j++)
384 tjA = tmp_vdwtype[j];
386 /* Go through all other tpes and see if any have non-zero
387 * VdW parameters when combined with this one.
389 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
391 /* We already checked that the atoms weren't perturbed,
392 * so we only need to check state A now.
396 has_vdw[j] = (has_vdw[j] ||
397 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
398 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
399 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
404 has_vdw[j] = (has_vdw[j] ||
405 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
406 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
411 /* Now we know all we need to make the final check and assignment. */
415 * For this we require thatn all atoms have charge,
416 * the charges on atom 2 & 3 should be the same, and only
417 * atom 1 might have VdW.
419 if (has_vdw[1] == FALSE &&
420 has_vdw[2] == FALSE &&
421 tmp_charge[0] != 0 &&
422 tmp_charge[1] != 0 &&
423 tmp_charge[2] == tmp_charge[1])
425 srenew(solvent_parameters, *n_solvent_parameters+1);
426 solvent_parameters[*n_solvent_parameters].model = esolSPC;
427 solvent_parameters[*n_solvent_parameters].count = nmol;
428 for (k = 0; k < 3; k++)
430 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
431 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
434 *cg_sp = *n_solvent_parameters;
435 (*n_solvent_parameters)++;
440 /* Or could it be a TIP4P?
441 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
442 * Only atom 1 mght have VdW.
444 if (has_vdw[1] == FALSE &&
445 has_vdw[2] == FALSE &&
446 has_vdw[3] == FALSE &&
447 tmp_charge[0] == 0 &&
448 tmp_charge[1] != 0 &&
449 tmp_charge[2] == tmp_charge[1] &&
452 srenew(solvent_parameters, *n_solvent_parameters+1);
453 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
454 solvent_parameters[*n_solvent_parameters].count = nmol;
455 for (k = 0; k < 4; k++)
457 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
458 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
461 *cg_sp = *n_solvent_parameters;
462 (*n_solvent_parameters)++;
466 *solvent_parameters_p = solvent_parameters;
470 check_solvent(FILE * fp,
471 const gmx_mtop_t * mtop,
473 cginfo_mb_t *cginfo_mb)
476 const t_block * mols;
477 const gmx_moltype_t *molt;
478 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
479 int n_solvent_parameters;
480 solvent_parameters_t *solvent_parameters;
486 fprintf(debug, "Going to determine what solvent types we have.\n");
491 n_solvent_parameters = 0;
492 solvent_parameters = NULL;
493 /* Allocate temporary array for solvent type */
494 snew(cg_sp, mtop->nmolblock);
498 for (mb = 0; mb < mtop->nmolblock; mb++)
500 molt = &mtop->moltype[mtop->molblock[mb].type];
502 /* Here we have to loop over all individual molecules
503 * because we need to check for QMMM particles.
505 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
506 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
507 nmol = mtop->molblock[mb].nmol/nmol_ch;
508 for (mol = 0; mol < nmol_ch; mol++)
511 am = mol*cgs->index[cgs->nr];
512 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
514 check_solvent_cg(molt, cg_mol, nmol,
515 mtop->groups.grpnr[egcQMMM] ?
516 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
517 &mtop->groups.grps[egcQMMM],
519 &n_solvent_parameters, &solvent_parameters,
520 cginfo_mb[mb].cginfo[cgm+cg_mol],
521 &cg_sp[mb][cgm+cg_mol]);
524 cg_offset += cgs->nr;
525 at_offset += cgs->index[cgs->nr];
528 /* Puh! We finished going through all charge groups.
529 * Now find the most common solvent model.
532 /* Most common solvent this far */
534 for (i = 0; i < n_solvent_parameters; i++)
537 solvent_parameters[i].count > solvent_parameters[bestsp].count)
545 bestsol = solvent_parameters[bestsp].model;
552 #ifdef DISABLE_WATER_NLIST
557 for (mb = 0; mb < mtop->nmolblock; mb++)
559 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
560 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
561 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
563 if (cg_sp[mb][i] == bestsp)
565 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
570 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
577 if (bestsol != esolNO && fp != NULL)
579 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
581 solvent_parameters[bestsp].count);
584 sfree(solvent_parameters);
585 fr->solvent_opt = bestsol;
589 acNONE = 0, acCONSTRAINT, acSETTLE
592 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
593 t_forcerec *fr, gmx_bool bNoSolvOpt,
594 gmx_bool *bExcl_IntraCGAll_InterCGNone)
597 const t_blocka *excl;
598 const gmx_moltype_t *molt;
599 const gmx_molblock_t *molb;
600 cginfo_mb_t *cginfo_mb;
603 int cg_offset, a_offset, cgm, am;
604 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
608 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
610 ncg_tot = ncg_mtop(mtop);
611 snew(cginfo_mb, mtop->nmolblock);
613 snew(type_VDW, fr->ntype);
614 for (ai = 0; ai < fr->ntype; ai++)
616 type_VDW[ai] = FALSE;
617 for (j = 0; j < fr->ntype; j++)
619 type_VDW[ai] = type_VDW[ai] ||
621 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
622 C12(fr->nbfp, fr->ntype, ai, j) != 0;
626 *bExcl_IntraCGAll_InterCGNone = TRUE;
629 snew(bExcl, excl_nalloc);
632 for (mb = 0; mb < mtop->nmolblock; mb++)
634 molb = &mtop->molblock[mb];
635 molt = &mtop->moltype[molb->type];
639 /* Check if the cginfo is identical for all molecules in this block.
640 * If so, we only need an array of the size of one molecule.
641 * Otherwise we make an array of #mol times #cgs per molecule.
645 for (m = 0; m < molb->nmol; m++)
647 am = m*cgs->index[cgs->nr];
648 for (cg = 0; cg < cgs->nr; cg++)
651 a1 = cgs->index[cg+1];
652 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
653 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
657 if (mtop->groups.grpnr[egcQMMM] != NULL)
659 for (ai = a0; ai < a1; ai++)
661 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
662 mtop->groups.grpnr[egcQMMM][a_offset +ai])
671 cginfo_mb[mb].cg_start = cg_offset;
672 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
673 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
674 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
675 cginfo = cginfo_mb[mb].cginfo;
677 /* Set constraints flags for constrained atoms */
678 snew(a_con, molt->atoms.nr);
679 for (ftype = 0; ftype < F_NRE; ftype++)
681 if (interaction_function[ftype].flags & IF_CONSTRAINT)
686 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
690 for (a = 0; a < nral; a++)
692 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
693 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
699 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
702 am = m*cgs->index[cgs->nr];
703 for (cg = 0; cg < cgs->nr; cg++)
706 a1 = cgs->index[cg+1];
708 /* Store the energy group in cginfo */
709 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
710 SET_CGINFO_GID(cginfo[cgm+cg], gid);
712 /* Check the intra/inter charge group exclusions */
713 if (a1-a0 > excl_nalloc)
715 excl_nalloc = a1 - a0;
716 srenew(bExcl, excl_nalloc);
718 /* bExclIntraAll: all intra cg interactions excluded
719 * bExclInter: any inter cg interactions excluded
721 bExclIntraAll = TRUE;
725 for (ai = a0; ai < a1; ai++)
727 /* Check VDW and electrostatic interactions */
728 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
729 type_VDW[molt->atoms.atom[ai].typeB]);
730 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
731 molt->atoms.atom[ai].qB != 0);
733 /* Clear the exclusion list for atom ai */
734 for (aj = a0; aj < a1; aj++)
736 bExcl[aj-a0] = FALSE;
738 /* Loop over all the exclusions of atom ai */
739 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
742 if (aj < a0 || aj >= a1)
751 /* Check if ai excludes a0 to a1 */
752 for (aj = a0; aj < a1; aj++)
756 bExclIntraAll = FALSE;
763 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
766 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
774 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
778 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
780 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
782 /* The size in cginfo is currently only read with DD */
783 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
787 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
791 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
793 /* Store the charge group size */
794 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
796 if (!bExclIntraAll || bExclInter)
798 *bExcl_IntraCGAll_InterCGNone = FALSE;
805 cg_offset += molb->nmol*cgs->nr;
806 a_offset += molb->nmol*cgs->index[cgs->nr];
810 /* the solvent optimizer is called after the QM is initialized,
811 * because we don't want to have the QM subsystemto become an
815 check_solvent(fplog, mtop, fr, cginfo_mb);
817 if (getenv("GMX_NO_SOLV_OPT"))
821 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
822 "Disabling all solvent optimization\n");
824 fr->solvent_opt = esolNO;
828 fr->solvent_opt = esolNO;
830 if (!fr->solvent_opt)
832 for (mb = 0; mb < mtop->nmolblock; mb++)
834 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
836 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
844 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
849 ncg = cgi_mb[nmb-1].cg_end;
852 for (cg = 0; cg < ncg; cg++)
854 while (cg >= cgi_mb[mb].cg_end)
859 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
865 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
867 /*This now calculates sum for q and c6*/
868 double qsum, q2sum, q, c6sum, c6, sqrc6;
870 const t_atoms *atoms;
875 for (mb = 0; mb < mtop->nmolblock; mb++)
877 nmol = mtop->molblock[mb].nmol;
878 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
879 for (i = 0; i < atoms->nr; i++)
881 q = atoms->atom[i].q;
884 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
886 c6sum += nmol*sqrc6*sqrc6;
890 fr->q2sum[0] = q2sum;
891 fr->c6sum[0] = c6sum/12;
893 if (fr->efep != efepNO)
898 for (mb = 0; mb < mtop->nmolblock; mb++)
900 nmol = mtop->molblock[mb].nmol;
901 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
902 for (i = 0; i < atoms->nr; i++)
904 q = atoms->atom[i].qB;
907 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
909 c6sum += nmol*sqrc6*sqrc6;
912 fr->q2sum[1] = q2sum;
913 fr->c6sum[1] = c6sum/12;
918 fr->qsum[1] = fr->qsum[0];
919 fr->q2sum[1] = fr->q2sum[0];
920 fr->c6sum[1] = fr->c6sum[0];
924 if (fr->efep == efepNO)
926 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
930 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
931 fr->qsum[0], fr->qsum[1]);
936 void update_forcerec(t_forcerec *fr, matrix box)
938 if (fr->eeltype == eelGRF)
940 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
941 fr->rcoulomb, fr->temp, fr->zsquare, box,
942 &fr->kappa, &fr->k_rf, &fr->c_rf);
946 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
948 const t_atoms *atoms, *atoms_tpi;
949 const t_blocka *excl;
950 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
951 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
952 long long int npair, npair_ij, tmpi, tmpj;
954 double npair, npair_ij, tmpi, tmpj;
956 double csix, ctwelve;
960 real *nbfp_comb = NULL;
966 /* For LJ-PME, we want to correct for the difference between the
967 * actual C6 values and the C6 values used by the LJ-PME based on
968 * combination rules. */
970 if (EVDW_PME(fr->vdwtype))
972 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
973 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
974 for (tpi = 0; tpi < ntp; ++tpi)
976 for (tpj = 0; tpj < ntp; ++tpj)
978 C6(nbfp_comb, ntp, tpi, tpj) =
979 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
980 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
985 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
993 /* Count the types so we avoid natoms^2 operations */
994 snew(typecount, ntp);
995 gmx_mtop_count_atomtypes(mtop, q, typecount);
997 for (tpi = 0; tpi < ntp; tpi++)
999 for (tpj = tpi; tpj < ntp; tpj++)
1001 tmpi = typecount[tpi];
1002 tmpj = typecount[tpj];
1005 npair_ij = tmpi*tmpj;
1009 npair_ij = tmpi*(tmpi - 1)/2;
1013 /* nbfp now includes the 6.0 derivative prefactor */
1014 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1018 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1019 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1020 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1026 /* Subtract the excluded pairs.
1027 * The main reason for substracting exclusions is that in some cases
1028 * some combinations might never occur and the parameters could have
1029 * any value. These unused values should not influence the dispersion
1032 for (mb = 0; mb < mtop->nmolblock; mb++)
1034 nmol = mtop->molblock[mb].nmol;
1035 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1036 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1037 for (i = 0; (i < atoms->nr); i++)
1041 tpi = atoms->atom[i].type;
1045 tpi = atoms->atom[i].typeB;
1047 j1 = excl->index[i];
1048 j2 = excl->index[i+1];
1049 for (j = j1; j < j2; j++)
1056 tpj = atoms->atom[k].type;
1060 tpj = atoms->atom[k].typeB;
1064 /* nbfp now includes the 6.0 derivative prefactor */
1065 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1069 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1070 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1071 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1081 /* Only correct for the interaction of the test particle
1082 * with the rest of the system.
1085 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1088 for (mb = 0; mb < mtop->nmolblock; mb++)
1090 nmol = mtop->molblock[mb].nmol;
1091 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1092 for (j = 0; j < atoms->nr; j++)
1095 /* Remove the interaction of the test charge group
1098 if (mb == mtop->nmolblock-1)
1102 if (mb == 0 && nmol == 1)
1104 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1109 tpj = atoms->atom[j].type;
1113 tpj = atoms->atom[j].typeB;
1115 for (i = 0; i < fr->n_tpi; i++)
1119 tpi = atoms_tpi->atom[i].type;
1123 tpi = atoms_tpi->atom[i].typeB;
1127 /* nbfp now includes the 6.0 derivative prefactor */
1128 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1132 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1133 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1134 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1141 if (npair - nexcl <= 0 && fplog)
1143 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1149 csix /= npair - nexcl;
1150 ctwelve /= npair - nexcl;
1154 fprintf(debug, "Counted %d exclusions\n", nexcl);
1155 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1156 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1158 fr->avcsix[q] = csix;
1159 fr->avctwelve[q] = ctwelve;
1162 if (EVDW_PME(fr->vdwtype))
1169 if (fr->eDispCorr == edispcAllEner ||
1170 fr->eDispCorr == edispcAllEnerPres)
1172 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1173 fr->avcsix[0], fr->avctwelve[0]);
1177 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1183 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1184 const gmx_mtop_t *mtop)
1186 const t_atoms *at1, *at2;
1187 int mt1, mt2, i, j, tpi, tpj, ntypes;
1193 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1200 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1202 at1 = &mtop->moltype[mt1].atoms;
1203 for (i = 0; (i < at1->nr); i++)
1205 tpi = at1->atom[i].type;
1208 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1211 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1213 at2 = &mtop->moltype[mt2].atoms;
1214 for (j = 0; (j < at2->nr); j++)
1216 tpj = at2->atom[j].type;
1219 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1221 b = BHAMB(nbfp, ntypes, tpi, tpj);
1222 if (b > fr->bham_b_max)
1226 if ((b < bmin) || (bmin == -1))
1236 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1237 bmin, fr->bham_b_max);
1241 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1242 t_forcerec *fr, real rtab,
1243 const t_commrec *cr,
1244 const char *tabfn, char *eg1, char *eg2,
1254 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1259 sprintf(buf, "%s", tabfn);
1262 /* Append the two energy group names */
1263 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1264 eg1, eg2, ftp2ext(efXVG));
1266 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1267 /* Copy the contents of the table to separate coulomb and LJ tables too,
1268 * to improve cache performance.
1270 /* For performance reasons we want
1271 * the table data to be aligned to 16-byte. The pointers could be freed
1272 * but currently aren't.
1274 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1275 nbl->table_elec.format = nbl->table_elec_vdw.format;
1276 nbl->table_elec.r = nbl->table_elec_vdw.r;
1277 nbl->table_elec.n = nbl->table_elec_vdw.n;
1278 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1279 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1280 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1281 nbl->table_elec.ninteractions = 1;
1282 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1283 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1285 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1286 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1287 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1288 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1289 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1290 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1291 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1292 nbl->table_vdw.ninteractions = 2;
1293 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1294 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1296 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1298 for (j = 0; j < 4; j++)
1300 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1302 for (j = 0; j < 8; j++)
1304 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1309 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1310 int *ncount, int **count)
1312 const gmx_moltype_t *molt;
1314 int mt, ftype, stride, i, j, tabnr;
1316 for (mt = 0; mt < mtop->nmoltype; mt++)
1318 molt = &mtop->moltype[mt];
1319 for (ftype = 0; ftype < F_NRE; ftype++)
1321 if (ftype == ftype1 || ftype == ftype2)
1323 il = &molt->ilist[ftype];
1324 stride = 1 + NRAL(ftype);
1325 for (i = 0; i < il->nr; i += stride)
1327 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1330 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1332 if (tabnr >= *ncount)
1334 srenew(*count, tabnr+1);
1335 for (j = *ncount; j < tabnr+1; j++)
1348 static bondedtable_t *make_bonded_tables(FILE *fplog,
1349 int ftype1, int ftype2,
1350 const gmx_mtop_t *mtop,
1351 const char *basefn, const char *tabext)
1353 int i, ncount, *count;
1361 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1366 for (i = 0; i < ncount; i++)
1370 sprintf(tabfn, "%s", basefn);
1371 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1372 tabext, i, ftp2ext(efXVG));
1373 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1382 void forcerec_set_ranges(t_forcerec *fr,
1383 int ncg_home, int ncg_force,
1385 int natoms_force_constr, int natoms_f_novirsum)
1390 /* fr->ncg_force is unused in the standard code,
1391 * but it can be useful for modified code dealing with charge groups.
1393 fr->ncg_force = ncg_force;
1394 fr->natoms_force = natoms_force;
1395 fr->natoms_force_constr = natoms_force_constr;
1397 if (fr->natoms_force_constr > fr->nalloc_force)
1399 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1403 srenew(fr->f_twin, fr->nalloc_force);
1407 if (fr->bF_NoVirSum)
1409 fr->f_novirsum_n = natoms_f_novirsum;
1410 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1412 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1413 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1418 fr->f_novirsum_n = 0;
1422 static real cutoff_inf(real cutoff)
1426 cutoff = GMX_CUTOFF_INF;
1432 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1433 t_forcerec *fr, const t_inputrec *ir,
1434 const char *tabfn, const gmx_mtop_t *mtop,
1442 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1446 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1448 sprintf(buf, "%s", tabfn);
1449 for (i = 0; i < ir->adress->n_tf_grps; i++)
1451 j = ir->adress->tf_table_index[i]; /* get energy group index */
1452 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1453 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1456 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1458 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1463 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1470 ir->rcoulomb == 0 &&
1472 ir->ePBC == epbcNONE &&
1473 ir->vdwtype == evdwCUT &&
1474 ir->coulombtype == eelCUT &&
1475 ir->efep == efepNO &&
1476 (ir->implicit_solvent == eisNO ||
1477 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1478 ir->gb_algorithm == egbHCT ||
1479 ir->gb_algorithm == egbOBC))) &&
1480 getenv("GMX_NO_ALLVSALL") == NULL
1483 if (bAllvsAll && ir->opts.ngener > 1)
1485 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1491 fprintf(stderr, "\n%s\n", note);
1495 fprintf(fp, "\n%s\n", note);
1501 if (bAllvsAll && fp && MASTER(cr))
1503 fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
1510 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1514 /* These thread local data structures are used for bondeds only */
1515 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1517 if (fr->nthreads > 1)
1519 snew(fr->f_t, fr->nthreads);
1520 /* Thread 0 uses the global force and energy arrays */
1521 for (t = 1; t < fr->nthreads; t++)
1523 fr->f_t[t].f = NULL;
1524 fr->f_t[t].f_nalloc = 0;
1525 snew(fr->f_t[t].fshift, SHIFTS);
1526 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1527 for (i = 0; i < egNR; i++)
1529 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1536 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1540 *kernel_type = nbnxnk4x4_PlainC;
1541 *ewald_excl = ewaldexclTable;
1543 #ifdef GMX_NBNXN_SIMD
1545 #ifdef GMX_NBNXN_SIMD_4XN
1546 *kernel_type = nbnxnk4xN_SIMD_4xN;
1548 #ifdef GMX_NBNXN_SIMD_2XNN
1549 /* We expect the 2xNN kernels to be faster in most cases */
1550 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1553 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
1554 if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1556 /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1557 * 10% with HT, 50% without HT, but extra zeros interactions
1558 * can compensate. As we currently don't detect the actual use
1559 * of HT, switch to 4x8 to avoid a potential performance hit.
1561 *kernel_type = nbnxnk4xN_SIMD_4xN;
1564 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1566 #ifdef GMX_NBNXN_SIMD_4XN
1567 *kernel_type = nbnxnk4xN_SIMD_4xN;
1569 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1572 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1574 #ifdef GMX_NBNXN_SIMD_2XNN
1575 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1577 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1581 /* Analytical Ewald exclusion correction is only an option in
1582 * the SIMD kernel. On BlueGene/Q, this is faster regardless
1583 * of precision. In single precision, this is faster on
1584 * Bulldozer, and slightly faster on Sandy Bridge.
1586 #if ((defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE) || (defined GMX_CPU_ACCELERATION_IBM_QPX)
1587 *ewald_excl = ewaldexclAnalytical;
1589 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1591 *ewald_excl = ewaldexclTable;
1593 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1595 *ewald_excl = ewaldexclAnalytical;
1599 #endif /* GMX_NBNXN_SIMD */
1603 const char *lookup_nbnxn_kernel_name(int kernel_type)
1605 const char *returnvalue = NULL;
1606 switch (kernel_type)
1609 returnvalue = "not set";
1611 case nbnxnk4x4_PlainC:
1612 returnvalue = "plain C";
1614 case nbnxnk4xN_SIMD_4xN:
1615 case nbnxnk4xN_SIMD_2xNN:
1616 #ifdef GMX_NBNXN_SIMD
1618 /* We have x86 SSE2 compatible SIMD */
1619 #ifdef GMX_X86_AVX_128_FMA
1620 returnvalue = "AVX-128-FMA";
1622 #if defined GMX_X86_AVX_256 || defined __AVX__
1623 /* x86 SIMD intrinsics can be converted to SSE or AVX depending
1624 * on compiler flags. As we use nearly identical intrinsics,
1625 * compiling for AVX without an AVX macros effectively results
1627 * For gcc we check for __AVX__
1628 * At least a check for icc should be added (if there is a macro)
1630 #if defined GMX_X86_AVX_256 && !defined GMX_NBNXN_HALF_WIDTH_SIMD
1631 returnvalue = "AVX-256";
1633 returnvalue = "AVX-128";
1636 #ifdef GMX_X86_SSE4_1
1637 returnvalue = "SSE4.1";
1639 returnvalue = "SSE2";
1643 #else /* GMX_X86_SSE2 */
1644 /* not GMX_X86_SSE2, but other SIMD */
1645 returnvalue = "SIMD";
1646 #endif /* GMX_X86_SSE2 */
1647 #else /* GMX_NBNXN_SIMD */
1648 returnvalue = "not available";
1649 #endif /* GMX_NBNXN_SIMD */
1651 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1652 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1656 gmx_fatal(FARGS, "Illegal kernel type selected");
1663 static void pick_nbnxn_kernel(FILE *fp,
1664 const t_commrec *cr,
1665 gmx_bool use_cpu_acceleration,
1667 gmx_bool bEmulateGPU,
1668 const t_inputrec *ir,
1671 gmx_bool bDoNonbonded)
1673 assert(kernel_type);
1675 *kernel_type = nbnxnkNotSet;
1676 *ewald_excl = ewaldexclTable;
1680 *kernel_type = nbnxnk8x8x8_PlainC;
1684 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1689 *kernel_type = nbnxnk8x8x8_CUDA;
1692 if (*kernel_type == nbnxnkNotSet)
1694 if (use_cpu_acceleration)
1696 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1700 *kernel_type = nbnxnk4x4_PlainC;
1704 if (bDoNonbonded && fp != NULL)
1706 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1707 lookup_nbnxn_kernel_name(*kernel_type),
1708 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1709 nbnxn_kernel_to_cj_size(*kernel_type));
1713 static void pick_nbnxn_resources(const t_commrec *cr,
1714 const gmx_hw_info_t *hwinfo,
1715 gmx_bool bDoNonbonded,
1717 gmx_bool *bEmulateGPU,
1718 const gmx_gpu_opt_t *gpu_opt)
1720 gmx_bool bEmulateGPUEnvVarSet;
1721 char gpu_err_str[STRLEN];
1725 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1727 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1728 * GPUs (currently) only handle non-bonded calculations, we will
1729 * automatically switch to emulation if non-bonded calculations are
1730 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1731 * way to turn off GPU initialization, data movement, and cleanup.
1733 * GPU emulation can be useful to assess the performance one can expect by
1734 * adding GPU(s) to the machine. The conditional below allows this even
1735 * if mdrun is compiled without GPU acceleration support.
1736 * Note that you should freezing the system as otherwise it will explode.
1738 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1740 gpu_opt->ncuda_dev_use > 0));
1742 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1744 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1746 /* Each PP node will use the intra-node id-th device from the
1747 * list of detected/selected GPUs. */
1748 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1749 &hwinfo->gpu_info, gpu_opt))
1751 /* At this point the init should never fail as we made sure that
1752 * we have all the GPUs we need. If it still does, we'll bail. */
1753 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1755 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1756 cr->rank_pp_intranode),
1760 /* Here we actually turn on hardware GPU acceleration */
1765 gmx_bool uses_simple_tables(int cutoff_scheme,
1766 nonbonded_verlet_t *nbv,
1769 gmx_bool bUsesSimpleTables = TRUE;
1772 switch (cutoff_scheme)
1775 bUsesSimpleTables = TRUE;
1778 assert(NULL != nbv && NULL != nbv->grp);
1779 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1780 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1783 gmx_incons("unimplemented");
1785 return bUsesSimpleTables;
1788 static void init_ewald_f_table(interaction_const_t *ic,
1789 gmx_bool bUsesSimpleTables,
1794 if (bUsesSimpleTables)
1796 /* With a spacing of 0.0005 we are at the force summation accuracy
1797 * for the SSE kernels for "normal" atomistic simulations.
1799 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1802 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1803 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1807 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1808 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1809 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1812 sfree_aligned(ic->tabq_coul_FDV0);
1813 sfree_aligned(ic->tabq_coul_F);
1814 sfree_aligned(ic->tabq_coul_V);
1816 /* Create the original table data in FDV0 */
1817 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1818 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1819 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1820 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1821 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1824 void init_interaction_const_tables(FILE *fp,
1825 interaction_const_t *ic,
1826 gmx_bool bUsesSimpleTables,
1831 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1833 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1837 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1838 1/ic->tabq_scale, ic->tabq_size);
1843 static void init_interaction_const(FILE *fp,
1844 const t_commrec gmx_unused *cr,
1845 interaction_const_t **interaction_const,
1846 const t_forcerec *fr,
1849 interaction_const_t *ic;
1850 gmx_bool bUsesSimpleTables = TRUE;
1854 /* Just allocate something so we can free it */
1855 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1856 snew_aligned(ic->tabq_coul_F, 16, 32);
1857 snew_aligned(ic->tabq_coul_V, 16, 32);
1859 ic->rlist = fr->rlist;
1860 ic->rlistlong = fr->rlistlong;
1863 ic->rvdw = fr->rvdw;
1864 if (fr->vdw_modifier == eintmodPOTSHIFT)
1866 ic->sh_invrc6 = pow(ic->rvdw, -6.0);
1873 /* Electrostatics */
1874 ic->eeltype = fr->eeltype;
1875 ic->rcoulomb = fr->rcoulomb;
1876 ic->epsilon_r = fr->epsilon_r;
1877 ic->epsfac = fr->epsfac;
1880 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
1881 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1883 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1885 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
1892 /* Reaction-field */
1893 if (EEL_RF(ic->eeltype))
1895 ic->epsilon_rf = fr->epsilon_rf;
1896 ic->k_rf = fr->k_rf;
1897 ic->c_rf = fr->c_rf;
1901 /* For plain cut-off we might use the reaction-field kernels */
1902 ic->epsilon_rf = ic->epsilon_r;
1904 if (fr->coulomb_modifier == eintmodPOTSHIFT)
1906 ic->c_rf = 1/ic->rcoulomb;
1916 fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1917 sqr(ic->sh_invrc6), ic->sh_invrc6);
1918 if (ic->eeltype == eelCUT)
1920 fprintf(fp, ", Coulomb %.3f", ic->c_rf);
1922 else if (EEL_PME(ic->eeltype))
1924 fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
1929 *interaction_const = ic;
1931 if (fr->nbv != NULL && fr->nbv->bUseGPU)
1933 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
1935 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
1936 * also sharing texture references. To keep the code simple, we don't
1937 * treat texture references as shared resources, but this means that
1938 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
1939 * Hence, to ensure that the non-bonded kernels don't start before all
1940 * texture binding operations are finished, we need to wait for all ranks
1941 * to arrive here before continuing.
1943 * Note that we could omit this barrier if GPUs are not shared (or
1944 * texture objects are used), but as this is initialization code, there
1945 * is not point in complicating things.
1947 #ifdef GMX_THREAD_MPI
1952 #endif /* GMX_THREAD_MPI */
1955 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
1956 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
1959 static void init_nb_verlet(FILE *fp,
1960 nonbonded_verlet_t **nb_verlet,
1961 const t_inputrec *ir,
1962 const t_forcerec *fr,
1963 const t_commrec *cr,
1964 const char *nbpu_opt)
1966 nonbonded_verlet_t *nbv;
1969 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
1971 nbnxn_alloc_t *nb_alloc;
1972 nbnxn_free_t *nb_free;
1976 pick_nbnxn_resources(cr, fr->hwinfo,
1984 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
1985 for (i = 0; i < nbv->ngrp; i++)
1987 nbv->grp[i].nbl_lists.nnbl = 0;
1988 nbv->grp[i].nbat = NULL;
1989 nbv->grp[i].kernel_type = nbnxnkNotSet;
1991 if (i == 0) /* local */
1993 pick_nbnxn_kernel(fp, cr, fr->use_cpu_acceleration,
1994 nbv->bUseGPU, bEmulateGPU, ir,
1995 &nbv->grp[i].kernel_type,
1996 &nbv->grp[i].ewald_excl,
1999 else /* non-local */
2001 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2003 /* Use GPU for local, select a CPU kernel for non-local */
2004 pick_nbnxn_kernel(fp, cr, fr->use_cpu_acceleration,
2006 &nbv->grp[i].kernel_type,
2007 &nbv->grp[i].ewald_excl,
2010 bHybridGPURun = TRUE;
2014 /* Use the same kernel for local and non-local interactions */
2015 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2016 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2023 /* init the NxN GPU data; the last argument tells whether we'll have
2024 * both local and non-local NB calculation on GPU */
2025 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2026 &fr->hwinfo->gpu_info, fr->gpu_opt,
2027 cr->rank_pp_intranode,
2028 (nbv->ngrp > 1) && !bHybridGPURun);
2030 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2034 nbv->min_ci_balanced = strtol(env, &end, 10);
2035 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2037 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2042 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2043 nbv->min_ci_balanced);
2048 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2051 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2052 nbv->min_ci_balanced);
2058 nbv->min_ci_balanced = 0;
2063 nbnxn_init_search(&nbv->nbs,
2064 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2065 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2066 gmx_omp_nthreads_get(emntNonbonded));
2068 for (i = 0; i < nbv->ngrp; i++)
2070 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2072 nb_alloc = &pmalloc;
2081 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2082 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2083 /* 8x8x8 "non-simple" lists are ATM always combined */
2084 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2088 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2090 snew(nbv->grp[i].nbat, 1);
2091 nbnxn_atomdata_init(fp,
2093 nbv->grp[i].kernel_type,
2094 fr->ntype, fr->nbfp,
2096 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2101 nbv->grp[i].nbat = nbv->grp[0].nbat;
2106 void init_forcerec(FILE *fp,
2107 const output_env_t oenv,
2110 const t_inputrec *ir,
2111 const gmx_mtop_t *mtop,
2112 const t_commrec *cr,
2118 const char *nbpu_opt,
2119 gmx_bool bNoSolvOpt,
2122 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2127 gmx_bool bGenericKernelOnly;
2128 gmx_bool bTab, bSep14tab, bNormalnblists;
2130 int *nm_ind, egp_flags;
2132 if (fr->hwinfo == NULL)
2134 /* Detect hardware, gather information.
2135 * In mdrun, hwinfo has already been set before calling init_forcerec.
2136 * Here we ignore GPUs, as tools will not use them anyhow.
2138 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2141 /* By default we turn acceleration on, but it might be turned off further down... */
2142 fr->use_cpu_acceleration = TRUE;
2144 fr->bDomDec = DOMAINDECOMP(cr);
2146 natoms = mtop->natoms;
2148 if (check_box(ir->ePBC, box))
2150 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2153 /* Test particle insertion ? */
2156 /* Set to the size of the molecule to be inserted (the last one) */
2157 /* Because of old style topologies, we have to use the last cg
2158 * instead of the last molecule type.
2160 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2161 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2162 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2164 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2172 /* Copy AdResS parameters */
2175 fr->adress_type = ir->adress->type;
2176 fr->adress_const_wf = ir->adress->const_wf;
2177 fr->adress_ex_width = ir->adress->ex_width;
2178 fr->adress_hy_width = ir->adress->hy_width;
2179 fr->adress_icor = ir->adress->icor;
2180 fr->adress_site = ir->adress->site;
2181 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2182 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2185 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2186 for (i = 0; i < ir->adress->n_energy_grps; i++)
2188 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2191 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2192 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2193 for (i = 0; i < fr->n_adress_tf_grps; i++)
2195 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2197 copy_rvec(ir->adress->refs, fr->adress_refs);
2201 fr->adress_type = eAdressOff;
2202 fr->adress_do_hybridpairs = FALSE;
2205 /* Copy the user determined parameters */
2206 fr->userint1 = ir->userint1;
2207 fr->userint2 = ir->userint2;
2208 fr->userint3 = ir->userint3;
2209 fr->userint4 = ir->userint4;
2210 fr->userreal1 = ir->userreal1;
2211 fr->userreal2 = ir->userreal2;
2212 fr->userreal3 = ir->userreal3;
2213 fr->userreal4 = ir->userreal4;
2216 fr->fc_stepsize = ir->fc_stepsize;
2219 fr->efep = ir->efep;
2220 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2221 if (ir->fepvals->bScCoul)
2223 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2224 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2228 fr->sc_alphacoul = 0;
2229 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2231 fr->sc_power = ir->fepvals->sc_power;
2232 fr->sc_r_power = ir->fepvals->sc_r_power;
2233 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2235 env = getenv("GMX_SCSIGMA_MIN");
2239 sscanf(env, "%lf", &dbl);
2240 fr->sc_sigma6_min = pow(dbl, 6);
2243 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2247 fr->bNonbonded = TRUE;
2248 if (getenv("GMX_NO_NONBONDED") != NULL)
2250 /* turn off non-bonded calculations */
2251 fr->bNonbonded = FALSE;
2252 md_print_warn(cr, fp,
2253 "Found environment variable GMX_NO_NONBONDED.\n"
2254 "Disabling nonbonded calculations.\n");
2257 bGenericKernelOnly = FALSE;
2259 /* We now check in the NS code whether a particular combination of interactions
2260 * can be used with water optimization, and disable it if that is not the case.
2263 if (getenv("GMX_NB_GENERIC") != NULL)
2268 "Found environment variable GMX_NB_GENERIC.\n"
2269 "Disabling all interaction-specific nonbonded kernels, will only\n"
2270 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2272 bGenericKernelOnly = TRUE;
2275 if (bGenericKernelOnly == TRUE)
2280 if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2282 fr->use_cpu_acceleration = FALSE;
2286 "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
2287 "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
2291 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2293 /* Check if we can/should do all-vs-all kernels */
2294 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2295 fr->AllvsAll_work = NULL;
2296 fr->AllvsAll_workgb = NULL;
2298 /* All-vs-all kernels have not been implemented in 4.6, and
2299 * the SIMD group kernels are also buggy in this case. Non-accelerated
2300 * group kernels are OK. See Redmine #1249. */
2303 fr->bAllvsAll = FALSE;
2304 fr->use_cpu_acceleration = FALSE;
2308 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2309 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2310 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2311 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2312 "we are proceeding by disabling all CPU architecture-specific\n"
2313 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2314 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2318 /* Neighbour searching stuff */
2319 fr->cutoff_scheme = ir->cutoff_scheme;
2320 fr->bGrid = (ir->ns_type == ensGRID);
2321 fr->ePBC = ir->ePBC;
2323 /* Determine if we will do PBC for distances in bonded interactions */
2324 if (fr->ePBC == epbcNONE)
2326 fr->bMolPBC = FALSE;
2330 if (!DOMAINDECOMP(cr))
2332 /* The group cut-off scheme and SHAKE assume charge groups
2333 * are whole, but not using molpbc is faster in most cases.
2335 if (fr->cutoff_scheme == ecutsGROUP ||
2336 (ir->eConstrAlg == econtSHAKE &&
2337 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2338 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2340 fr->bMolPBC = ir->bPeriodicMols;
2345 if (getenv("GMX_USE_GRAPH") != NULL)
2347 fr->bMolPBC = FALSE;
2350 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2357 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2360 fr->bGB = (ir->implicit_solvent == eisGBSA);
2362 fr->rc_scaling = ir->refcoord_scaling;
2363 copy_rvec(ir->posres_com, fr->posres_com);
2364 copy_rvec(ir->posres_comB, fr->posres_comB);
2365 fr->rlist = cutoff_inf(ir->rlist);
2366 fr->rlistlong = cutoff_inf(ir->rlistlong);
2367 fr->eeltype = ir->coulombtype;
2368 fr->vdwtype = ir->vdwtype;
2369 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2371 fr->coulomb_modifier = ir->coulomb_modifier;
2372 fr->vdw_modifier = ir->vdw_modifier;
2374 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2375 switch (fr->eeltype)
2378 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2384 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2388 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2389 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2398 case eelPMEUSERSWITCH:
2399 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2404 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2408 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2412 /* Vdw: Translate from mdp settings to kernel format */
2413 switch (fr->vdwtype)
2419 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2423 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2430 case evdwENCADSHIFT:
2431 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2435 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2439 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2440 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2441 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2443 fr->bTwinRange = fr->rlistlong > fr->rlist;
2444 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2446 fr->reppow = mtop->ffparams.reppow;
2448 if (ir->cutoff_scheme == ecutsGROUP)
2450 fr->bvdwtab = (fr->vdwtype != evdwCUT ||
2451 !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2452 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2453 fr->bcoultab = !(fr->eeltype == eelCUT ||
2454 fr->eeltype == eelEWALD ||
2455 fr->eeltype == eelPME ||
2456 fr->eeltype == eelRF ||
2457 fr->eeltype == eelRF_ZERO);
2459 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2460 * going to be faster to tabulate the interaction than calling the generic kernel.
2462 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2464 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2466 fr->bcoultab = TRUE;
2469 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2470 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2471 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2472 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2474 if (fr->rcoulomb != fr->rvdw)
2476 fr->bcoultab = TRUE;
2480 if (getenv("GMX_REQUIRE_TABLES"))
2483 fr->bcoultab = TRUE;
2488 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2489 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2492 if (fr->bvdwtab == TRUE)
2494 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2495 fr->nbkernel_vdw_modifier = eintmodNONE;
2497 if (fr->bcoultab == TRUE)
2499 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2500 fr->nbkernel_elec_modifier = eintmodNONE;
2504 if (ir->cutoff_scheme == ecutsVERLET)
2506 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2508 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2510 fr->bvdwtab = FALSE;
2511 fr->bcoultab = FALSE;
2514 /* Tables are used for direct ewald sum */
2517 if (EEL_PME(ir->coulombtype))
2521 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2523 if (ir->coulombtype == eelP3M_AD)
2525 please_cite(fp, "Hockney1988");
2526 please_cite(fp, "Ballenegger2012");
2530 please_cite(fp, "Essmann95a");
2533 if (ir->ewald_geometry == eewg3DC)
2537 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2539 please_cite(fp, "In-Chul99a");
2542 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2543 init_ewald_tab(&(fr->ewald_table), ir, fp);
2546 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2547 1/fr->ewaldcoeff_q);
2551 if (EVDW_PME(ir->vdwtype))
2555 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2557 please_cite(fp, "Essman95a");
2558 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2561 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2562 1/fr->ewaldcoeff_lj);
2566 /* Electrostatics */
2567 fr->epsilon_r = ir->epsilon_r;
2568 fr->epsilon_rf = ir->epsilon_rf;
2569 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2570 fr->rcoulomb_switch = ir->rcoulomb_switch;
2571 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2573 /* Parameters for generalized RF */
2577 if (fr->eeltype == eelGRF)
2579 init_generalized_rf(fp, mtop, ir, fr);
2582 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2583 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2584 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2585 IR_ELEC_FIELD(*ir) ||
2586 (fr->adress_icor != eAdressICOff)
2589 if (fr->cutoff_scheme == ecutsGROUP &&
2590 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2592 /* Count the total number of charge groups */
2593 fr->cg_nalloc = ncg_mtop(mtop);
2594 srenew(fr->cg_cm, fr->cg_nalloc);
2596 if (fr->shift_vec == NULL)
2598 snew(fr->shift_vec, SHIFTS);
2601 if (fr->fshift == NULL)
2603 snew(fr->fshift, SHIFTS);
2606 if (fr->nbfp == NULL)
2608 fr->ntype = mtop->ffparams.atnr;
2609 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2612 /* Copy the energy group exclusions */
2613 fr->egp_flags = ir->opts.egp_flags;
2615 /* Van der Waals stuff */
2616 fr->rvdw = cutoff_inf(ir->rvdw);
2617 fr->rvdw_switch = ir->rvdw_switch;
2618 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2620 if (fr->rvdw_switch >= fr->rvdw)
2622 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2623 fr->rvdw_switch, fr->rvdw);
2627 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2628 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2629 fr->rvdw_switch, fr->rvdw);
2633 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2635 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2638 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2640 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2645 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2646 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2649 fr->eDispCorr = ir->eDispCorr;
2650 if (ir->eDispCorr != edispcNO)
2652 set_avcsixtwelve(fp, fr, mtop);
2657 set_bham_b_max(fp, fr, mtop);
2660 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2662 /* Copy the GBSA data (radius, volume and surftens for each
2663 * atomtype) from the topology atomtype section to forcerec.
2665 snew(fr->atype_radius, fr->ntype);
2666 snew(fr->atype_vol, fr->ntype);
2667 snew(fr->atype_surftens, fr->ntype);
2668 snew(fr->atype_gb_radius, fr->ntype);
2669 snew(fr->atype_S_hct, fr->ntype);
2671 if (mtop->atomtypes.nr > 0)
2673 for (i = 0; i < fr->ntype; i++)
2675 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2677 for (i = 0; i < fr->ntype; i++)
2679 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2681 for (i = 0; i < fr->ntype; i++)
2683 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2685 for (i = 0; i < fr->ntype; i++)
2687 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2689 for (i = 0; i < fr->ntype; i++)
2691 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2695 /* Generate the GB table if needed */
2699 fr->gbtabscale = 2000;
2701 fr->gbtabscale = 500;
2705 fr->gbtab = make_gb_table(oenv, fr);
2707 init_gb(&fr->born, cr, fr, ir, mtop, ir->gb_algorithm);
2709 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2710 if (!DOMAINDECOMP(cr))
2712 make_local_gb(cr, fr->born, ir->gb_algorithm);
2716 /* Set the charge scaling */
2717 if (fr->epsilon_r != 0)
2719 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2723 /* eps = 0 is infinite dieletric: no coulomb interactions */
2727 /* Reaction field constants */
2728 if (EEL_RF(fr->eeltype))
2730 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2731 fr->rcoulomb, fr->temp, fr->zsquare, box,
2732 &fr->kappa, &fr->k_rf, &fr->c_rf);
2735 /*This now calculates sum for q and c6*/
2736 set_chargesum(fp, fr, mtop);
2738 /* if we are using LR electrostatics, and they are tabulated,
2739 * the tables will contain modified coulomb interactions.
2740 * Since we want to use the non-shifted ones for 1-4
2741 * coulombic interactions, we must have an extra set of tables.
2744 /* Construct tables.
2745 * A little unnecessary to make both vdw and coul tables sometimes,
2746 * but what the heck... */
2748 bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
2750 bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2751 fr->bBHAM || fr->bEwald) &&
2752 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2753 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2754 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2756 negp_pp = ir->opts.ngener - ir->nwall;
2760 bNormalnblists = TRUE;
2765 bNormalnblists = (ir->eDispCorr != edispcNO);
2766 for (egi = 0; egi < negp_pp; egi++)
2768 for (egj = egi; egj < negp_pp; egj++)
2770 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2771 if (!(egp_flags & EGP_EXCL))
2773 if (egp_flags & EGP_TABLE)
2779 bNormalnblists = TRUE;
2786 fr->nnblists = negptable + 1;
2790 fr->nnblists = negptable;
2792 if (fr->nnblists > 1)
2794 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2803 snew(fr->nblists, fr->nnblists);
2805 /* This code automatically gives table length tabext without cut-off's,
2806 * in that case grompp should already have checked that we do not need
2807 * normal tables and we only generate tables for 1-4 interactions.
2809 rtab = ir->rlistlong + ir->tabext;
2813 /* make tables for ordinary interactions */
2816 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2819 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2823 fr->tab14 = fr->nblists[0].table_elec_vdw;
2833 /* Read the special tables for certain energy group pairs */
2834 nm_ind = mtop->groups.grps[egcENER].nm_ind;
2835 for (egi = 0; egi < negp_pp; egi++)
2837 for (egj = egi; egj < negp_pp; egj++)
2839 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2840 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2842 nbl = &(fr->nblists[m]);
2843 if (fr->nnblists > 1)
2845 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2847 /* Read the table file with the two energy groups names appended */
2848 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2849 *mtop->groups.grpname[nm_ind[egi]],
2850 *mtop->groups.grpname[nm_ind[egj]],
2854 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2855 *mtop->groups.grpname[nm_ind[egi]],
2856 *mtop->groups.grpname[nm_ind[egj]],
2857 &fr->nblists[fr->nnblists/2+m]);
2861 else if (fr->nnblists > 1)
2863 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2871 /* generate extra tables with plain Coulomb for 1-4 interactions only */
2872 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2873 GMX_MAKETABLES_14ONLY);
2876 /* Read AdResS Thermo Force table if needed */
2877 if (fr->adress_icor == eAdressICThermoForce)
2879 /* old todo replace */
2881 if (ir->adress->n_tf_grps > 0)
2883 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2888 /* load the default table */
2889 snew(fr->atf_tabs, 1);
2890 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2895 fr->nwall = ir->nwall;
2896 if (ir->nwall && ir->wall_type == ewtTABLE)
2898 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2903 fcd->bondtab = make_bonded_tables(fp,
2904 F_TABBONDS, F_TABBONDSNC,
2906 fcd->angletab = make_bonded_tables(fp,
2909 fcd->dihtab = make_bonded_tables(fp,
2917 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2921 /* QM/MM initialization if requested
2925 fprintf(stderr, "QM/MM calculation requested.\n");
2928 fr->bQMMM = ir->bQMMM;
2929 fr->qr = mk_QMMMrec();
2931 /* Set all the static charge group info */
2932 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
2933 &fr->bExcl_IntraCGAll_InterCGNone);
2934 if (DOMAINDECOMP(cr))
2940 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
2943 if (!DOMAINDECOMP(cr))
2945 /* When using particle decomposition, the effect of the second argument,
2946 * which sets fr->hcg, is corrected later in do_md and init_em.
2948 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
2949 mtop->natoms, mtop->natoms, mtop->natoms);
2952 fr->print_force = print_force;
2955 /* coarse load balancing vars */
2960 /* Initialize neighbor search */
2961 init_ns(fp, cr, &fr->ns, fr, mtop);
2963 if (cr->duty & DUTY_PP)
2965 gmx_nonbonded_setup(fr, bGenericKernelOnly);
2969 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
2974 /* Initialize the thread working data for bonded interactions */
2975 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
2977 snew(fr->excl_load, fr->nthreads+1);
2979 if (fr->cutoff_scheme == ecutsVERLET)
2981 if (ir->rcoulomb != ir->rvdw)
2983 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
2986 init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
2989 /* fr->ic is used both by verlet and group kernels (to some extent) now */
2990 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
2992 if (ir->eDispCorr != edispcNO)
2994 calc_enervirdiff(fp, ir->eDispCorr, fr);
2998 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
2999 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3000 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3002 void pr_forcerec(FILE *fp, t_forcerec *fr)
3006 pr_real(fp, fr->rlist);
3007 pr_real(fp, fr->rcoulomb);
3008 pr_real(fp, fr->fudgeQQ);
3009 pr_bool(fp, fr->bGrid);
3010 pr_bool(fp, fr->bTwinRange);
3011 /*pr_int(fp,fr->cg0);
3012 pr_int(fp,fr->hcg);*/
3013 for (i = 0; i < fr->nnblists; i++)
3015 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3017 pr_real(fp, fr->rcoulomb_switch);
3018 pr_real(fp, fr->rcoulomb);
3023 void forcerec_set_excl_load(t_forcerec *fr,
3024 const gmx_localtop_t *top, const t_commrec *cr)
3027 int t, i, j, ntot, n, ntarget;
3029 if (cr != NULL && PARTDECOMP(cr))
3031 /* No OpenMP with particle decomposition */
3039 ind = top->excls.index;
3043 for (i = 0; i < top->excls.nr; i++)
3045 for (j = ind[i]; j < ind[i+1]; j++)
3054 fr->excl_load[0] = 0;
3057 for (t = 1; t <= fr->nthreads; t++)
3059 ntarget = (ntot*t)/fr->nthreads;
3060 while (i < top->excls.nr && n < ntarget)
3062 for (j = ind[i]; j < ind[i+1]; j++)
3071 fr->excl_load[t] = i;