2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "types/commrec.h"
48 #include "gromacs/math/utilities.h"
50 #include "gromacs/utility/smalloc.h"
52 #include "gmx_fatal.h"
56 #include "nonbonded.h"
65 #include "md_support.h"
66 #include "md_logging.h"
70 #include "mtop_util.h"
71 #include "nbnxn_simd.h"
72 #include "nbnxn_search.h"
73 #include "nbnxn_atomdata.h"
74 #include "nbnxn_consts.h"
75 #include "gmx_omp_nthreads.h"
76 #include "gmx_detect_hardware.h"
79 #include "types/nbnxn_cuda_types_ext.h"
80 #include "gpu_utils.h"
81 #include "nbnxn_cuda_data_mgmt.h"
82 #include "pmalloc_cuda.h"
84 t_forcerec *mk_forcerec(void)
94 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
98 for (i = 0; (i < atnr); i++)
100 for (j = 0; (j < atnr); j++)
102 fprintf(fp, "%2d - %2d", i, j);
105 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
106 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
110 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
111 C12(nbfp, atnr, i, j)/12.0);
118 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
126 snew(nbfp, 3*atnr*atnr);
127 for (i = k = 0; (i < atnr); i++)
129 for (j = 0; (j < atnr); j++, k++)
131 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
132 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
133 /* nbfp now includes the 6.0 derivative prefactor */
134 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
140 snew(nbfp, 2*atnr*atnr);
141 for (i = k = 0; (i < atnr); i++)
143 for (j = 0; (j < atnr); j++, k++)
145 /* nbfp now includes the 6.0/12.0 derivative prefactors */
146 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
147 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
155 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
158 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
161 /* For LJ-PME simulations, we correct the energies with the reciprocal space
162 * inside of the cut-off. To do this the non-bonded kernels needs to have
163 * access to the C6-values used on the reciprocal grid in pme.c
167 snew(grid, 2*atnr*atnr);
168 for (i = k = 0; (i < atnr); i++)
170 for (j = 0; (j < atnr); j++, k++)
172 c6i = idef->iparams[i*(atnr+1)].lj.c6;
173 c12i = idef->iparams[i*(atnr+1)].lj.c12;
174 c6j = idef->iparams[j*(atnr+1)].lj.c6;
175 c12j = idef->iparams[j*(atnr+1)].lj.c12;
176 c6 = sqrt(c6i * c6j);
177 if (fr->ljpme_combination_rule == eljpmeLB
178 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
180 sigmai = pow(c12i / c6i, 1.0/6.0);
181 sigmaj = pow(c12j / c6j, 1.0/6.0);
182 epsi = c6i * c6i / c12i;
183 epsj = c6j * c6j / c12j;
184 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
186 /* Store the elements at the same relative positions as C6 in nbfp in order
187 * to simplify access in the kernels
189 grid[2*(atnr*i+j)] = c6*6.0;
195 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
199 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
203 snew(nbfp, 2*atnr*atnr);
204 for (i = 0; i < atnr; ++i)
206 for (j = 0; j < atnr; ++j)
208 c6i = idef->iparams[i*(atnr+1)].lj.c6;
209 c12i = idef->iparams[i*(atnr+1)].lj.c12;
210 c6j = idef->iparams[j*(atnr+1)].lj.c6;
211 c12j = idef->iparams[j*(atnr+1)].lj.c12;
212 c6 = sqrt(c6i * c6j);
213 c12 = sqrt(c12i * c12j);
214 if (comb_rule == eCOMB_ARITHMETIC
215 && !gmx_numzero(c6) && !gmx_numzero(c12))
217 sigmai = pow(c12i / c6i, 1.0/6.0);
218 sigmaj = pow(c12j / c6j, 1.0/6.0);
219 epsi = c6i * c6i / c12i;
220 epsj = c6j * c6j / c12j;
221 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
222 c12 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
224 C6(nbfp, atnr, i, j) = c6*6.0;
225 C12(nbfp, atnr, i, j) = c12*12.0;
231 /* This routine sets fr->solvent_opt to the most common solvent in the
232 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
233 * the fr->solvent_type array with the correct type (or esolNO).
235 * Charge groups that fulfill the conditions but are not identical to the
236 * most common one will be marked as esolNO in the solvent_type array.
238 * TIP3p is identical to SPC for these purposes, so we call it
239 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
241 * NOTE: QM particle should not
242 * become an optimized solvent. Not even if there is only one charge
252 } solvent_parameters_t;
255 check_solvent_cg(const gmx_moltype_t *molt,
258 const unsigned char *qm_grpnr,
259 const t_grps *qm_grps,
261 int *n_solvent_parameters,
262 solvent_parameters_t **solvent_parameters_p,
266 const t_blocka *excl;
273 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
274 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
277 solvent_parameters_t *solvent_parameters;
279 /* We use a list with parameters for each solvent type.
280 * Every time we discover a new molecule that fulfills the basic
281 * conditions for a solvent we compare with the previous entries
282 * in these lists. If the parameters are the same we just increment
283 * the counter for that type, and otherwise we create a new type
284 * based on the current molecule.
286 * Once we've finished going through all molecules we check which
287 * solvent is most common, and mark all those molecules while we
288 * clear the flag on all others.
291 solvent_parameters = *solvent_parameters_p;
293 /* Mark the cg first as non optimized */
296 /* Check if this cg has no exclusions with atoms in other charge groups
297 * and all atoms inside the charge group excluded.
298 * We only have 3 or 4 atom solvent loops.
300 if (GET_CGINFO_EXCL_INTER(cginfo) ||
301 !GET_CGINFO_EXCL_INTRA(cginfo))
306 /* Get the indices of the first atom in this charge group */
307 j0 = molt->cgs.index[cg0];
308 j1 = molt->cgs.index[cg0+1];
310 /* Number of atoms in our molecule */
316 "Moltype '%s': there are %d atoms in this charge group\n",
320 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
323 if (nj < 3 || nj > 4)
328 /* Check if we are doing QM on this group */
330 if (qm_grpnr != NULL)
332 for (j = j0; j < j1 && !qm; j++)
334 qm = (qm_grpnr[j] < qm_grps->nr - 1);
337 /* Cannot use solvent optimization with QM */
343 atom = molt->atoms.atom;
345 /* Still looks like a solvent, time to check parameters */
347 /* If it is perturbed (free energy) we can't use the solvent loops,
348 * so then we just skip to the next molecule.
352 for (j = j0; j < j1 && !perturbed; j++)
354 perturbed = PERTURBED(atom[j]);
362 /* Now it's only a question if the VdW and charge parameters
363 * are OK. Before doing the check we compare and see if they are
364 * identical to a possible previous solvent type.
365 * First we assign the current types and charges.
367 for (j = 0; j < nj; j++)
369 tmp_vdwtype[j] = atom[j0+j].type;
370 tmp_charge[j] = atom[j0+j].q;
373 /* Does it match any previous solvent type? */
374 for (k = 0; k < *n_solvent_parameters; k++)
379 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
380 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
381 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
386 /* Check that types & charges match for all atoms in molecule */
387 for (j = 0; j < nj && match == TRUE; j++)
389 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
393 if (tmp_charge[j] != solvent_parameters[k].charge[j])
400 /* Congratulations! We have a matched solvent.
401 * Flag it with this type for later processing.
404 solvent_parameters[k].count += nmol;
406 /* We are done with this charge group */
411 /* If we get here, we have a tentative new solvent type.
412 * Before we add it we must check that it fulfills the requirements
413 * of the solvent optimized loops. First determine which atoms have
416 for (j = 0; j < nj; j++)
419 tjA = tmp_vdwtype[j];
421 /* Go through all other tpes and see if any have non-zero
422 * VdW parameters when combined with this one.
424 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
426 /* We already checked that the atoms weren't perturbed,
427 * so we only need to check state A now.
431 has_vdw[j] = (has_vdw[j] ||
432 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
433 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
434 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
439 has_vdw[j] = (has_vdw[j] ||
440 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
441 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
446 /* Now we know all we need to make the final check and assignment. */
450 * For this we require thatn all atoms have charge,
451 * the charges on atom 2 & 3 should be the same, and only
452 * atom 1 might have VdW.
454 if (has_vdw[1] == FALSE &&
455 has_vdw[2] == FALSE &&
456 tmp_charge[0] != 0 &&
457 tmp_charge[1] != 0 &&
458 tmp_charge[2] == tmp_charge[1])
460 srenew(solvent_parameters, *n_solvent_parameters+1);
461 solvent_parameters[*n_solvent_parameters].model = esolSPC;
462 solvent_parameters[*n_solvent_parameters].count = nmol;
463 for (k = 0; k < 3; k++)
465 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
466 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
469 *cg_sp = *n_solvent_parameters;
470 (*n_solvent_parameters)++;
475 /* Or could it be a TIP4P?
476 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
477 * Only atom 1 mght have VdW.
479 if (has_vdw[1] == FALSE &&
480 has_vdw[2] == FALSE &&
481 has_vdw[3] == FALSE &&
482 tmp_charge[0] == 0 &&
483 tmp_charge[1] != 0 &&
484 tmp_charge[2] == tmp_charge[1] &&
487 srenew(solvent_parameters, *n_solvent_parameters+1);
488 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
489 solvent_parameters[*n_solvent_parameters].count = nmol;
490 for (k = 0; k < 4; k++)
492 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
493 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
496 *cg_sp = *n_solvent_parameters;
497 (*n_solvent_parameters)++;
501 *solvent_parameters_p = solvent_parameters;
505 check_solvent(FILE * fp,
506 const gmx_mtop_t * mtop,
508 cginfo_mb_t *cginfo_mb)
511 const t_block * mols;
512 const gmx_moltype_t *molt;
513 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
514 int n_solvent_parameters;
515 solvent_parameters_t *solvent_parameters;
521 fprintf(debug, "Going to determine what solvent types we have.\n");
526 n_solvent_parameters = 0;
527 solvent_parameters = NULL;
528 /* Allocate temporary array for solvent type */
529 snew(cg_sp, mtop->nmolblock);
533 for (mb = 0; mb < mtop->nmolblock; mb++)
535 molt = &mtop->moltype[mtop->molblock[mb].type];
537 /* Here we have to loop over all individual molecules
538 * because we need to check for QMMM particles.
540 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
541 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
542 nmol = mtop->molblock[mb].nmol/nmol_ch;
543 for (mol = 0; mol < nmol_ch; mol++)
546 am = mol*cgs->index[cgs->nr];
547 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
549 check_solvent_cg(molt, cg_mol, nmol,
550 mtop->groups.grpnr[egcQMMM] ?
551 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
552 &mtop->groups.grps[egcQMMM],
554 &n_solvent_parameters, &solvent_parameters,
555 cginfo_mb[mb].cginfo[cgm+cg_mol],
556 &cg_sp[mb][cgm+cg_mol]);
559 cg_offset += cgs->nr;
560 at_offset += cgs->index[cgs->nr];
563 /* Puh! We finished going through all charge groups.
564 * Now find the most common solvent model.
567 /* Most common solvent this far */
569 for (i = 0; i < n_solvent_parameters; i++)
572 solvent_parameters[i].count > solvent_parameters[bestsp].count)
580 bestsol = solvent_parameters[bestsp].model;
587 #ifdef DISABLE_WATER_NLIST
592 for (mb = 0; mb < mtop->nmolblock; mb++)
594 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
595 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
596 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
598 if (cg_sp[mb][i] == bestsp)
600 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
605 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
612 if (bestsol != esolNO && fp != NULL)
614 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
616 solvent_parameters[bestsp].count);
619 sfree(solvent_parameters);
620 fr->solvent_opt = bestsol;
624 acNONE = 0, acCONSTRAINT, acSETTLE
627 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
628 t_forcerec *fr, gmx_bool bNoSolvOpt,
629 gmx_bool *bFEP_NonBonded,
630 gmx_bool *bExcl_IntraCGAll_InterCGNone)
633 const t_blocka *excl;
634 const gmx_moltype_t *molt;
635 const gmx_molblock_t *molb;
636 cginfo_mb_t *cginfo_mb;
639 int cg_offset, a_offset, cgm, am;
640 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
644 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
646 ncg_tot = ncg_mtop(mtop);
647 snew(cginfo_mb, mtop->nmolblock);
649 snew(type_VDW, fr->ntype);
650 for (ai = 0; ai < fr->ntype; ai++)
652 type_VDW[ai] = FALSE;
653 for (j = 0; j < fr->ntype; j++)
655 type_VDW[ai] = type_VDW[ai] ||
657 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
658 C12(fr->nbfp, fr->ntype, ai, j) != 0;
662 *bFEP_NonBonded = FALSE;
663 *bExcl_IntraCGAll_InterCGNone = TRUE;
666 snew(bExcl, excl_nalloc);
669 for (mb = 0; mb < mtop->nmolblock; mb++)
671 molb = &mtop->molblock[mb];
672 molt = &mtop->moltype[molb->type];
676 /* Check if the cginfo is identical for all molecules in this block.
677 * If so, we only need an array of the size of one molecule.
678 * Otherwise we make an array of #mol times #cgs per molecule.
682 for (m = 0; m < molb->nmol; m++)
684 am = m*cgs->index[cgs->nr];
685 for (cg = 0; cg < cgs->nr; cg++)
688 a1 = cgs->index[cg+1];
689 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
690 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
694 if (mtop->groups.grpnr[egcQMMM] != NULL)
696 for (ai = a0; ai < a1; ai++)
698 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
699 mtop->groups.grpnr[egcQMMM][a_offset +ai])
708 cginfo_mb[mb].cg_start = cg_offset;
709 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
710 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
711 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
712 cginfo = cginfo_mb[mb].cginfo;
714 /* Set constraints flags for constrained atoms */
715 snew(a_con, molt->atoms.nr);
716 for (ftype = 0; ftype < F_NRE; ftype++)
718 if (interaction_function[ftype].flags & IF_CONSTRAINT)
723 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
727 for (a = 0; a < nral; a++)
729 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
730 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
736 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
739 am = m*cgs->index[cgs->nr];
740 for (cg = 0; cg < cgs->nr; cg++)
743 a1 = cgs->index[cg+1];
745 /* Store the energy group in cginfo */
746 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
747 SET_CGINFO_GID(cginfo[cgm+cg], gid);
749 /* Check the intra/inter charge group exclusions */
750 if (a1-a0 > excl_nalloc)
752 excl_nalloc = a1 - a0;
753 srenew(bExcl, excl_nalloc);
755 /* bExclIntraAll: all intra cg interactions excluded
756 * bExclInter: any inter cg interactions excluded
758 bExclIntraAll = TRUE;
762 bHavePerturbedAtoms = FALSE;
763 for (ai = a0; ai < a1; ai++)
765 /* Check VDW and electrostatic interactions */
766 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
767 type_VDW[molt->atoms.atom[ai].typeB]);
768 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
769 molt->atoms.atom[ai].qB != 0);
771 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
773 /* Clear the exclusion list for atom ai */
774 for (aj = a0; aj < a1; aj++)
776 bExcl[aj-a0] = FALSE;
778 /* Loop over all the exclusions of atom ai */
779 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
782 if (aj < a0 || aj >= a1)
791 /* Check if ai excludes a0 to a1 */
792 for (aj = a0; aj < a1; aj++)
796 bExclIntraAll = FALSE;
803 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
806 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
814 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
818 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
820 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
822 /* The size in cginfo is currently only read with DD */
823 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
827 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
831 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
833 if (bHavePerturbedAtoms && fr->efep != efepNO)
835 SET_CGINFO_FEP(cginfo[cgm+cg]);
836 *bFEP_NonBonded = TRUE;
838 /* Store the charge group size */
839 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
841 if (!bExclIntraAll || bExclInter)
843 *bExcl_IntraCGAll_InterCGNone = FALSE;
850 cg_offset += molb->nmol*cgs->nr;
851 a_offset += molb->nmol*cgs->index[cgs->nr];
855 /* the solvent optimizer is called after the QM is initialized,
856 * because we don't want to have the QM subsystemto become an
860 check_solvent(fplog, mtop, fr, cginfo_mb);
862 if (getenv("GMX_NO_SOLV_OPT"))
866 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
867 "Disabling all solvent optimization\n");
869 fr->solvent_opt = esolNO;
873 fr->solvent_opt = esolNO;
875 if (!fr->solvent_opt)
877 for (mb = 0; mb < mtop->nmolblock; mb++)
879 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
881 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
889 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
894 ncg = cgi_mb[nmb-1].cg_end;
897 for (cg = 0; cg < ncg; cg++)
899 while (cg >= cgi_mb[mb].cg_end)
904 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
910 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
912 /*This now calculates sum for q and c6*/
913 double qsum, q2sum, q, c6sum, c6;
915 const t_atoms *atoms;
920 for (mb = 0; mb < mtop->nmolblock; mb++)
922 nmol = mtop->molblock[mb].nmol;
923 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
924 for (i = 0; i < atoms->nr; i++)
926 q = atoms->atom[i].q;
929 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
934 fr->q2sum[0] = q2sum;
935 fr->c6sum[0] = c6sum;
937 if (fr->efep != efepNO)
942 for (mb = 0; mb < mtop->nmolblock; mb++)
944 nmol = mtop->molblock[mb].nmol;
945 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
946 for (i = 0; i < atoms->nr; i++)
948 q = atoms->atom[i].qB;
951 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
955 fr->q2sum[1] = q2sum;
956 fr->c6sum[1] = c6sum;
961 fr->qsum[1] = fr->qsum[0];
962 fr->q2sum[1] = fr->q2sum[0];
963 fr->c6sum[1] = fr->c6sum[0];
967 if (fr->efep == efepNO)
969 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
973 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
974 fr->qsum[0], fr->qsum[1]);
979 void update_forcerec(t_forcerec *fr, matrix box)
981 if (fr->eeltype == eelGRF)
983 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
984 fr->rcoulomb, fr->temp, fr->zsquare, box,
985 &fr->kappa, &fr->k_rf, &fr->c_rf);
989 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
991 const t_atoms *atoms, *atoms_tpi;
992 const t_blocka *excl;
993 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
994 gmx_int64_t npair, npair_ij, tmpi, tmpj;
995 double csix, ctwelve;
999 real *nbfp_comb = NULL;
1005 /* For LJ-PME, we want to correct for the difference between the
1006 * actual C6 values and the C6 values used by the LJ-PME based on
1007 * combination rules. */
1009 if (EVDW_PME(fr->vdwtype))
1011 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1012 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1013 for (tpi = 0; tpi < ntp; ++tpi)
1015 for (tpj = 0; tpj < ntp; ++tpj)
1017 C6(nbfp_comb, ntp, tpi, tpj) =
1018 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1019 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1024 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1032 /* Count the types so we avoid natoms^2 operations */
1033 snew(typecount, ntp);
1034 gmx_mtop_count_atomtypes(mtop, q, typecount);
1036 for (tpi = 0; tpi < ntp; tpi++)
1038 for (tpj = tpi; tpj < ntp; tpj++)
1040 tmpi = typecount[tpi];
1041 tmpj = typecount[tpj];
1044 npair_ij = tmpi*tmpj;
1048 npair_ij = tmpi*(tmpi - 1)/2;
1052 /* nbfp now includes the 6.0 derivative prefactor */
1053 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1057 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1058 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1059 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1065 /* Subtract the excluded pairs.
1066 * The main reason for substracting exclusions is that in some cases
1067 * some combinations might never occur and the parameters could have
1068 * any value. These unused values should not influence the dispersion
1071 for (mb = 0; mb < mtop->nmolblock; mb++)
1073 nmol = mtop->molblock[mb].nmol;
1074 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1075 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1076 for (i = 0; (i < atoms->nr); i++)
1080 tpi = atoms->atom[i].type;
1084 tpi = atoms->atom[i].typeB;
1086 j1 = excl->index[i];
1087 j2 = excl->index[i+1];
1088 for (j = j1; j < j2; j++)
1095 tpj = atoms->atom[k].type;
1099 tpj = atoms->atom[k].typeB;
1103 /* nbfp now includes the 6.0 derivative prefactor */
1104 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1108 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1109 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1110 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1120 /* Only correct for the interaction of the test particle
1121 * with the rest of the system.
1124 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1127 for (mb = 0; mb < mtop->nmolblock; mb++)
1129 nmol = mtop->molblock[mb].nmol;
1130 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1131 for (j = 0; j < atoms->nr; j++)
1134 /* Remove the interaction of the test charge group
1137 if (mb == mtop->nmolblock-1)
1141 if (mb == 0 && nmol == 1)
1143 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1148 tpj = atoms->atom[j].type;
1152 tpj = atoms->atom[j].typeB;
1154 for (i = 0; i < fr->n_tpi; i++)
1158 tpi = atoms_tpi->atom[i].type;
1162 tpi = atoms_tpi->atom[i].typeB;
1166 /* nbfp now includes the 6.0 derivative prefactor */
1167 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1171 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1172 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1173 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1180 if (npair - nexcl <= 0 && fplog)
1182 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1188 csix /= npair - nexcl;
1189 ctwelve /= npair - nexcl;
1193 fprintf(debug, "Counted %d exclusions\n", nexcl);
1194 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1195 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1197 fr->avcsix[q] = csix;
1198 fr->avctwelve[q] = ctwelve;
1201 if (EVDW_PME(fr->vdwtype))
1208 if (fr->eDispCorr == edispcAllEner ||
1209 fr->eDispCorr == edispcAllEnerPres)
1211 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1212 fr->avcsix[0], fr->avctwelve[0]);
1216 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1222 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1223 const gmx_mtop_t *mtop)
1225 const t_atoms *at1, *at2;
1226 int mt1, mt2, i, j, tpi, tpj, ntypes;
1232 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1239 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1241 at1 = &mtop->moltype[mt1].atoms;
1242 for (i = 0; (i < at1->nr); i++)
1244 tpi = at1->atom[i].type;
1247 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1250 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1252 at2 = &mtop->moltype[mt2].atoms;
1253 for (j = 0; (j < at2->nr); j++)
1255 tpj = at2->atom[j].type;
1258 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1260 b = BHAMB(nbfp, ntypes, tpi, tpj);
1261 if (b > fr->bham_b_max)
1265 if ((b < bmin) || (bmin == -1))
1275 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1276 bmin, fr->bham_b_max);
1280 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1281 t_forcerec *fr, real rtab,
1282 const t_commrec *cr,
1283 const char *tabfn, char *eg1, char *eg2,
1293 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1298 sprintf(buf, "%s", tabfn);
1301 /* Append the two energy group names */
1302 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1303 eg1, eg2, ftp2ext(efXVG));
1305 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1306 /* Copy the contents of the table to separate coulomb and LJ tables too,
1307 * to improve cache performance.
1309 /* For performance reasons we want
1310 * the table data to be aligned to 16-byte. The pointers could be freed
1311 * but currently aren't.
1313 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1314 nbl->table_elec.format = nbl->table_elec_vdw.format;
1315 nbl->table_elec.r = nbl->table_elec_vdw.r;
1316 nbl->table_elec.n = nbl->table_elec_vdw.n;
1317 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1318 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1319 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1320 nbl->table_elec.ninteractions = 1;
1321 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1322 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1324 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1325 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1326 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1327 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1328 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1329 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1330 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1331 nbl->table_vdw.ninteractions = 2;
1332 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1333 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1335 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1337 for (j = 0; j < 4; j++)
1339 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1341 for (j = 0; j < 8; j++)
1343 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1348 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1349 int *ncount, int **count)
1351 const gmx_moltype_t *molt;
1353 int mt, ftype, stride, i, j, tabnr;
1355 for (mt = 0; mt < mtop->nmoltype; mt++)
1357 molt = &mtop->moltype[mt];
1358 for (ftype = 0; ftype < F_NRE; ftype++)
1360 if (ftype == ftype1 || ftype == ftype2)
1362 il = &molt->ilist[ftype];
1363 stride = 1 + NRAL(ftype);
1364 for (i = 0; i < il->nr; i += stride)
1366 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1369 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1371 if (tabnr >= *ncount)
1373 srenew(*count, tabnr+1);
1374 for (j = *ncount; j < tabnr+1; j++)
1387 static bondedtable_t *make_bonded_tables(FILE *fplog,
1388 int ftype1, int ftype2,
1389 const gmx_mtop_t *mtop,
1390 const char *basefn, const char *tabext)
1392 int i, ncount, *count;
1400 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1405 for (i = 0; i < ncount; i++)
1409 sprintf(tabfn, "%s", basefn);
1410 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1411 tabext, i, ftp2ext(efXVG));
1412 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1421 void forcerec_set_ranges(t_forcerec *fr,
1422 int ncg_home, int ncg_force,
1424 int natoms_force_constr, int natoms_f_novirsum)
1429 /* fr->ncg_force is unused in the standard code,
1430 * but it can be useful for modified code dealing with charge groups.
1432 fr->ncg_force = ncg_force;
1433 fr->natoms_force = natoms_force;
1434 fr->natoms_force_constr = natoms_force_constr;
1436 if (fr->natoms_force_constr > fr->nalloc_force)
1438 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1442 srenew(fr->f_twin, fr->nalloc_force);
1446 if (fr->bF_NoVirSum)
1448 fr->f_novirsum_n = natoms_f_novirsum;
1449 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1451 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1452 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1457 fr->f_novirsum_n = 0;
1461 static real cutoff_inf(real cutoff)
1465 cutoff = GMX_CUTOFF_INF;
1471 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1472 t_forcerec *fr, const t_inputrec *ir,
1473 const char *tabfn, const gmx_mtop_t *mtop,
1481 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1485 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1487 sprintf(buf, "%s", tabfn);
1488 for (i = 0; i < ir->adress->n_tf_grps; i++)
1490 j = ir->adress->tf_table_index[i]; /* get energy group index */
1491 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1492 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1495 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1497 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1502 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1509 ir->rcoulomb == 0 &&
1511 ir->ePBC == epbcNONE &&
1512 ir->vdwtype == evdwCUT &&
1513 ir->coulombtype == eelCUT &&
1514 ir->efep == efepNO &&
1515 (ir->implicit_solvent == eisNO ||
1516 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1517 ir->gb_algorithm == egbHCT ||
1518 ir->gb_algorithm == egbOBC))) &&
1519 getenv("GMX_NO_ALLVSALL") == NULL
1522 if (bAllvsAll && ir->opts.ngener > 1)
1524 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1530 fprintf(stderr, "\n%s\n", note);
1534 fprintf(fp, "\n%s\n", note);
1540 if (bAllvsAll && fp && MASTER(cr))
1542 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1549 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1553 /* These thread local data structures are used for bondeds only */
1554 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1556 if (fr->nthreads > 1)
1558 snew(fr->f_t, fr->nthreads);
1559 /* Thread 0 uses the global force and energy arrays */
1560 for (t = 1; t < fr->nthreads; t++)
1562 fr->f_t[t].f = NULL;
1563 fr->f_t[t].f_nalloc = 0;
1564 snew(fr->f_t[t].fshift, SHIFTS);
1565 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1566 for (i = 0; i < egNR; i++)
1568 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1575 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1576 const t_commrec *cr,
1577 const t_inputrec *ir,
1580 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1582 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1583 bGPU ? "GPUs" : "SIMD kernels",
1584 bGPU ? "CPU only" : "plain-C kernels");
1592 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1596 *kernel_type = nbnxnk4x4_PlainC;
1597 *ewald_excl = ewaldexclTable;
1599 #ifdef GMX_NBNXN_SIMD
1601 #ifdef GMX_NBNXN_SIMD_4XN
1602 *kernel_type = nbnxnk4xN_SIMD_4xN;
1604 #ifdef GMX_NBNXN_SIMD_2XNN
1605 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1608 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1609 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1610 * Currently this is based on the SIMD acceleration choice,
1611 * but it might be better to decide this at runtime based on CPU.
1613 * 4xN calculates more (zero) interactions, but has less pair-search
1614 * work and much better kernel instruction scheduling.
1616 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1617 * which doesn't have FMA, both the analytical and tabulated Ewald
1618 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1619 * 2x(4+4) because it results in significantly fewer pairs.
1620 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1621 * 10% with HT, 50% without HT. As we currently don't detect the actual
1622 * use of HT, use 4x8 to avoid a potential performance hit.
1623 * On Intel Haswell 4x8 is always faster.
1625 *kernel_type = nbnxnk4xN_SIMD_4xN;
1627 #ifndef GMX_SIMD_HAVE_FMA
1628 if (EEL_PME_EWALD(ir->coulombtype) ||
1629 EVDW_PME(ir->vdwtype))
1631 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1632 * There are enough instructions to make 2x(4+4) efficient.
1634 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1637 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1640 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1642 #ifdef GMX_NBNXN_SIMD_4XN
1643 *kernel_type = nbnxnk4xN_SIMD_4xN;
1645 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1648 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1650 #ifdef GMX_NBNXN_SIMD_2XNN
1651 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1653 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1657 /* Analytical Ewald exclusion correction is only an option in
1659 * Since table lookup's don't parallelize with SIMD, analytical
1660 * will probably always be faster for a SIMD width of 8 or more.
1661 * With FMA analytical is sometimes faster for a width if 4 as well.
1662 * On BlueGene/Q, this is faster regardless of precision.
1663 * In single precision, this is faster on Bulldozer.
1665 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1666 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1667 defined GMX_SIMD_IBM_QPX
1668 *ewald_excl = ewaldexclAnalytical;
1670 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1672 *ewald_excl = ewaldexclTable;
1674 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1676 *ewald_excl = ewaldexclAnalytical;
1680 #endif /* GMX_NBNXN_SIMD */
1684 const char *lookup_nbnxn_kernel_name(int kernel_type)
1686 const char *returnvalue = NULL;
1687 switch (kernel_type)
1690 returnvalue = "not set";
1692 case nbnxnk4x4_PlainC:
1693 returnvalue = "plain C";
1695 case nbnxnk4xN_SIMD_4xN:
1696 case nbnxnk4xN_SIMD_2xNN:
1697 #ifdef GMX_NBNXN_SIMD
1698 #if defined GMX_SIMD_X86_SSE2
1699 returnvalue = "SSE2";
1700 #elif defined GMX_SIMD_X86_SSE4_1
1701 returnvalue = "SSE4.1";
1702 #elif defined GMX_SIMD_X86_AVX_128_FMA
1703 returnvalue = "AVX_128_FMA";
1704 #elif defined GMX_SIMD_X86_AVX_256
1705 returnvalue = "AVX_256";
1706 #elif defined GMX_SIMD_X86_AVX2_256
1707 returnvalue = "AVX2_256";
1709 returnvalue = "SIMD";
1711 #else /* GMX_NBNXN_SIMD */
1712 returnvalue = "not available";
1713 #endif /* GMX_NBNXN_SIMD */
1715 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1716 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1720 gmx_fatal(FARGS, "Illegal kernel type selected");
1727 static void pick_nbnxn_kernel(FILE *fp,
1728 const t_commrec *cr,
1729 gmx_bool use_simd_kernels,
1731 gmx_bool bEmulateGPU,
1732 const t_inputrec *ir,
1735 gmx_bool bDoNonbonded)
1737 assert(kernel_type);
1739 *kernel_type = nbnxnkNotSet;
1740 *ewald_excl = ewaldexclTable;
1744 *kernel_type = nbnxnk8x8x8_PlainC;
1748 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1753 *kernel_type = nbnxnk8x8x8_CUDA;
1756 if (*kernel_type == nbnxnkNotSet)
1758 /* LJ PME with LB combination rule does 7 mesh operations.
1759 * This so slow that we don't compile SIMD non-bonded kernels for that.
1761 if (use_simd_kernels &&
1762 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1764 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1768 *kernel_type = nbnxnk4x4_PlainC;
1772 if (bDoNonbonded && fp != NULL)
1774 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1775 lookup_nbnxn_kernel_name(*kernel_type),
1776 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1777 nbnxn_kernel_to_cj_size(*kernel_type));
1779 if (nbnxnk4x4_PlainC == *kernel_type ||
1780 nbnxnk8x8x8_PlainC == *kernel_type)
1782 md_print_warn(cr, fp,
1783 "WARNING: Using the slow %s kernels. This should\n"
1784 "not happen during routine usage on supported platforms.\n\n",
1785 lookup_nbnxn_kernel_name(*kernel_type));
1790 static void pick_nbnxn_resources(const t_commrec *cr,
1791 const gmx_hw_info_t *hwinfo,
1792 gmx_bool bDoNonbonded,
1794 gmx_bool *bEmulateGPU,
1795 const gmx_gpu_opt_t *gpu_opt)
1797 gmx_bool bEmulateGPUEnvVarSet;
1798 char gpu_err_str[STRLEN];
1802 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1804 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1805 * GPUs (currently) only handle non-bonded calculations, we will
1806 * automatically switch to emulation if non-bonded calculations are
1807 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1808 * way to turn off GPU initialization, data movement, and cleanup.
1810 * GPU emulation can be useful to assess the performance one can expect by
1811 * adding GPU(s) to the machine. The conditional below allows this even
1812 * if mdrun is compiled without GPU acceleration support.
1813 * Note that you should freezing the system as otherwise it will explode.
1815 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1817 gpu_opt->ncuda_dev_use > 0));
1819 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1821 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1823 /* Each PP node will use the intra-node id-th device from the
1824 * list of detected/selected GPUs. */
1825 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1826 &hwinfo->gpu_info, gpu_opt))
1828 /* At this point the init should never fail as we made sure that
1829 * we have all the GPUs we need. If it still does, we'll bail. */
1830 gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1832 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1833 cr->rank_pp_intranode),
1837 /* Here we actually turn on hardware GPU acceleration */
1842 gmx_bool uses_simple_tables(int cutoff_scheme,
1843 nonbonded_verlet_t *nbv,
1846 gmx_bool bUsesSimpleTables = TRUE;
1849 switch (cutoff_scheme)
1852 bUsesSimpleTables = TRUE;
1855 assert(NULL != nbv && NULL != nbv->grp);
1856 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1857 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1860 gmx_incons("unimplemented");
1862 return bUsesSimpleTables;
1865 static void init_ewald_f_table(interaction_const_t *ic,
1866 gmx_bool bUsesSimpleTables,
1871 if (bUsesSimpleTables)
1873 /* Get the Ewald table spacing based on Coulomb and/or LJ
1874 * Ewald coefficients and rtol.
1876 ic->tabq_scale = ewald_spline3_table_scale(ic);
1878 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1879 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1883 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1884 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1885 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1888 sfree_aligned(ic->tabq_coul_FDV0);
1889 sfree_aligned(ic->tabq_coul_F);
1890 sfree_aligned(ic->tabq_coul_V);
1892 sfree_aligned(ic->tabq_vdw_FDV0);
1893 sfree_aligned(ic->tabq_vdw_F);
1894 sfree_aligned(ic->tabq_vdw_V);
1896 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1898 /* Create the original table data in FDV0 */
1899 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1900 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1901 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1902 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1903 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1906 if (EVDW_PME(ic->vdwtype))
1908 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1909 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1910 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1911 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1912 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1916 void init_interaction_const_tables(FILE *fp,
1917 interaction_const_t *ic,
1918 gmx_bool bUsesSimpleTables,
1923 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1925 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1929 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1930 1/ic->tabq_scale, ic->tabq_size);
1935 static void clear_force_switch_constants(shift_consts_t *sc)
1942 static void force_switch_constants(real p,
1946 /* Here we determine the coefficient for shifting the force to zero
1947 * between distance rsw and the cut-off rc.
1948 * For a potential of r^-p, we have force p*r^-(p+1).
1949 * But to save flops we absorb p in the coefficient.
1951 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1952 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1954 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1955 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1956 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1959 static void potential_switch_constants(real rsw, real rc,
1960 switch_consts_t *sc)
1962 /* The switch function is 1 at rsw and 0 at rc.
1963 * The derivative and second derivate are zero at both ends.
1964 * rsw = max(r - r_switch, 0)
1965 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1966 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1967 * force = force*dsw - potential*sw
1970 sc->c3 = -10*pow(rc - rsw, -3);
1971 sc->c4 = 15*pow(rc - rsw, -4);
1972 sc->c5 = -6*pow(rc - rsw, -5);
1976 init_interaction_const(FILE *fp,
1977 const t_commrec gmx_unused *cr,
1978 interaction_const_t **interaction_const,
1979 const t_forcerec *fr,
1982 interaction_const_t *ic;
1983 gmx_bool bUsesSimpleTables = TRUE;
1987 /* Just allocate something so we can free it */
1988 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1989 snew_aligned(ic->tabq_coul_F, 16, 32);
1990 snew_aligned(ic->tabq_coul_V, 16, 32);
1992 ic->rlist = fr->rlist;
1993 ic->rlistlong = fr->rlistlong;
1996 ic->vdwtype = fr->vdwtype;
1997 ic->vdw_modifier = fr->vdw_modifier;
1998 ic->rvdw = fr->rvdw;
1999 ic->rvdw_switch = fr->rvdw_switch;
2000 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
2001 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
2002 ic->sh_lj_ewald = 0;
2003 clear_force_switch_constants(&ic->dispersion_shift);
2004 clear_force_switch_constants(&ic->repulsion_shift);
2006 switch (ic->vdw_modifier)
2008 case eintmodPOTSHIFT:
2009 /* Only shift the potential, don't touch the force */
2010 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2011 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
2012 if (EVDW_PME(ic->vdwtype))
2016 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2017 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2020 case eintmodFORCESWITCH:
2021 /* Switch the force, switch and shift the potential */
2022 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2023 &ic->dispersion_shift);
2024 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2025 &ic->repulsion_shift);
2027 case eintmodPOTSWITCH:
2028 /* Switch the potential and force */
2029 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2033 case eintmodEXACTCUTOFF:
2034 /* Nothing to do here */
2037 gmx_incons("unimplemented potential modifier");
2040 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2042 /* Electrostatics */
2043 ic->eeltype = fr->eeltype;
2044 ic->coulomb_modifier = fr->coulomb_modifier;
2045 ic->rcoulomb = fr->rcoulomb;
2046 ic->epsilon_r = fr->epsilon_r;
2047 ic->epsfac = fr->epsfac;
2048 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2050 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2052 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2059 /* Reaction-field */
2060 if (EEL_RF(ic->eeltype))
2062 ic->epsilon_rf = fr->epsilon_rf;
2063 ic->k_rf = fr->k_rf;
2064 ic->c_rf = fr->c_rf;
2068 /* For plain cut-off we might use the reaction-field kernels */
2069 ic->epsilon_rf = ic->epsilon_r;
2071 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2073 ic->c_rf = 1/ic->rcoulomb;
2083 real dispersion_shift;
2085 dispersion_shift = ic->dispersion_shift.cpot;
2086 if (EVDW_PME(ic->vdwtype))
2088 dispersion_shift -= ic->sh_lj_ewald;
2090 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2091 ic->repulsion_shift.cpot, dispersion_shift);
2093 if (ic->eeltype == eelCUT)
2095 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2097 else if (EEL_PME(ic->eeltype))
2099 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2104 *interaction_const = ic;
2106 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2108 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2110 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2111 * also sharing texture references. To keep the code simple, we don't
2112 * treat texture references as shared resources, but this means that
2113 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2114 * Hence, to ensure that the non-bonded kernels don't start before all
2115 * texture binding operations are finished, we need to wait for all ranks
2116 * to arrive here before continuing.
2118 * Note that we could omit this barrier if GPUs are not shared (or
2119 * texture objects are used), but as this is initialization code, there
2120 * is not point in complicating things.
2122 #ifdef GMX_THREAD_MPI
2127 #endif /* GMX_THREAD_MPI */
2130 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2131 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2134 static void init_nb_verlet(FILE *fp,
2135 nonbonded_verlet_t **nb_verlet,
2136 gmx_bool bFEP_NonBonded,
2137 const t_inputrec *ir,
2138 const t_forcerec *fr,
2139 const t_commrec *cr,
2140 const char *nbpu_opt)
2142 nonbonded_verlet_t *nbv;
2145 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2147 nbnxn_alloc_t *nb_alloc;
2148 nbnxn_free_t *nb_free;
2152 pick_nbnxn_resources(cr, fr->hwinfo,
2160 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2161 for (i = 0; i < nbv->ngrp; i++)
2163 nbv->grp[i].nbl_lists.nnbl = 0;
2164 nbv->grp[i].nbat = NULL;
2165 nbv->grp[i].kernel_type = nbnxnkNotSet;
2167 if (i == 0) /* local */
2169 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2170 nbv->bUseGPU, bEmulateGPU, ir,
2171 &nbv->grp[i].kernel_type,
2172 &nbv->grp[i].ewald_excl,
2175 else /* non-local */
2177 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2179 /* Use GPU for local, select a CPU kernel for non-local */
2180 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2182 &nbv->grp[i].kernel_type,
2183 &nbv->grp[i].ewald_excl,
2186 bHybridGPURun = TRUE;
2190 /* Use the same kernel for local and non-local interactions */
2191 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2192 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2199 /* init the NxN GPU data; the last argument tells whether we'll have
2200 * both local and non-local NB calculation on GPU */
2201 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2202 &fr->hwinfo->gpu_info, fr->gpu_opt,
2203 cr->rank_pp_intranode,
2204 (nbv->ngrp > 1) && !bHybridGPURun);
2206 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2210 nbv->min_ci_balanced = strtol(env, &end, 10);
2211 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2213 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2218 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2219 nbv->min_ci_balanced);
2224 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2227 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2228 nbv->min_ci_balanced);
2234 nbv->min_ci_balanced = 0;
2239 nbnxn_init_search(&nbv->nbs,
2240 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2241 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2243 gmx_omp_nthreads_get(emntPairsearch));
2245 for (i = 0; i < nbv->ngrp; i++)
2247 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2249 nb_alloc = &pmalloc;
2258 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2259 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2260 /* 8x8x8 "non-simple" lists are ATM always combined */
2261 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2265 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2267 gmx_bool bSimpleList;
2268 int enbnxninitcombrule;
2270 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2272 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2274 /* Plain LJ cut-off: we can optimize with combination rules */
2275 enbnxninitcombrule = enbnxninitcombruleDETECT;
2277 else if (fr->vdwtype == evdwPME)
2279 /* LJ-PME: we need to use a combination rule for the grid */
2280 if (fr->ljpme_combination_rule == eljpmeGEOM)
2282 enbnxninitcombrule = enbnxninitcombruleGEOM;
2286 enbnxninitcombrule = enbnxninitcombruleLB;
2291 /* We use a full combination matrix: no rule required */
2292 enbnxninitcombrule = enbnxninitcombruleNONE;
2296 snew(nbv->grp[i].nbat, 1);
2297 nbnxn_atomdata_init(fp,
2299 nbv->grp[i].kernel_type,
2301 fr->ntype, fr->nbfp,
2303 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2308 nbv->grp[i].nbat = nbv->grp[0].nbat;
2313 void init_forcerec(FILE *fp,
2314 const output_env_t oenv,
2317 const t_inputrec *ir,
2318 const gmx_mtop_t *mtop,
2319 const t_commrec *cr,
2325 const char *nbpu_opt,
2326 gmx_bool bNoSolvOpt,
2329 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2334 gmx_bool bGenericKernelOnly;
2335 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2336 gmx_bool bFEP_NonBonded;
2338 int *nm_ind, egp_flags;
2340 if (fr->hwinfo == NULL)
2342 /* Detect hardware, gather information.
2343 * In mdrun, hwinfo has already been set before calling init_forcerec.
2344 * Here we ignore GPUs, as tools will not use them anyhow.
2346 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2349 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2350 fr->use_simd_kernels = TRUE;
2352 fr->bDomDec = DOMAINDECOMP(cr);
2354 natoms = mtop->natoms;
2356 if (check_box(ir->ePBC, box))
2358 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2361 /* Test particle insertion ? */
2364 /* Set to the size of the molecule to be inserted (the last one) */
2365 /* Because of old style topologies, we have to use the last cg
2366 * instead of the last molecule type.
2368 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2369 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2370 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2372 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2380 /* Copy AdResS parameters */
2383 fr->adress_type = ir->adress->type;
2384 fr->adress_const_wf = ir->adress->const_wf;
2385 fr->adress_ex_width = ir->adress->ex_width;
2386 fr->adress_hy_width = ir->adress->hy_width;
2387 fr->adress_icor = ir->adress->icor;
2388 fr->adress_site = ir->adress->site;
2389 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2390 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2393 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2394 for (i = 0; i < ir->adress->n_energy_grps; i++)
2396 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2399 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2400 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2401 for (i = 0; i < fr->n_adress_tf_grps; i++)
2403 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2405 copy_rvec(ir->adress->refs, fr->adress_refs);
2409 fr->adress_type = eAdressOff;
2410 fr->adress_do_hybridpairs = FALSE;
2413 /* Copy the user determined parameters */
2414 fr->userint1 = ir->userint1;
2415 fr->userint2 = ir->userint2;
2416 fr->userint3 = ir->userint3;
2417 fr->userint4 = ir->userint4;
2418 fr->userreal1 = ir->userreal1;
2419 fr->userreal2 = ir->userreal2;
2420 fr->userreal3 = ir->userreal3;
2421 fr->userreal4 = ir->userreal4;
2424 fr->fc_stepsize = ir->fc_stepsize;
2427 fr->efep = ir->efep;
2428 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2429 if (ir->fepvals->bScCoul)
2431 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2432 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2436 fr->sc_alphacoul = 0;
2437 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2439 fr->sc_power = ir->fepvals->sc_power;
2440 fr->sc_r_power = ir->fepvals->sc_r_power;
2441 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2443 env = getenv("GMX_SCSIGMA_MIN");
2447 sscanf(env, "%lf", &dbl);
2448 fr->sc_sigma6_min = pow(dbl, 6);
2451 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2455 fr->bNonbonded = TRUE;
2456 if (getenv("GMX_NO_NONBONDED") != NULL)
2458 /* turn off non-bonded calculations */
2459 fr->bNonbonded = FALSE;
2460 md_print_warn(cr, fp,
2461 "Found environment variable GMX_NO_NONBONDED.\n"
2462 "Disabling nonbonded calculations.\n");
2465 bGenericKernelOnly = FALSE;
2467 /* We now check in the NS code whether a particular combination of interactions
2468 * can be used with water optimization, and disable it if that is not the case.
2471 if (getenv("GMX_NB_GENERIC") != NULL)
2476 "Found environment variable GMX_NB_GENERIC.\n"
2477 "Disabling all interaction-specific nonbonded kernels, will only\n"
2478 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2480 bGenericKernelOnly = TRUE;
2483 if (bGenericKernelOnly == TRUE)
2488 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2490 fr->use_simd_kernels = FALSE;
2494 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2495 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2499 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2501 /* Check if we can/should do all-vs-all kernels */
2502 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2503 fr->AllvsAll_work = NULL;
2504 fr->AllvsAll_workgb = NULL;
2506 /* All-vs-all kernels have not been implemented in 4.6, and
2507 * the SIMD group kernels are also buggy in this case. Non-SIMD
2508 * group kernels are OK. See Redmine #1249. */
2511 fr->bAllvsAll = FALSE;
2512 fr->use_simd_kernels = FALSE;
2516 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2517 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2518 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2519 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2520 "we are proceeding by disabling all CPU architecture-specific\n"
2521 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2522 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2526 /* Neighbour searching stuff */
2527 fr->cutoff_scheme = ir->cutoff_scheme;
2528 fr->bGrid = (ir->ns_type == ensGRID);
2529 fr->ePBC = ir->ePBC;
2531 if (fr->cutoff_scheme == ecutsGROUP)
2533 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2534 "removed in a future release when 'verlet' supports all interaction forms.\n";
2538 fprintf(stderr, "\n%s\n", note);
2542 fprintf(fp, "\n%s\n", note);
2546 /* Determine if we will do PBC for distances in bonded interactions */
2547 if (fr->ePBC == epbcNONE)
2549 fr->bMolPBC = FALSE;
2553 if (!DOMAINDECOMP(cr))
2555 /* The group cut-off scheme and SHAKE assume charge groups
2556 * are whole, but not using molpbc is faster in most cases.
2558 if (fr->cutoff_scheme == ecutsGROUP ||
2559 (ir->eConstrAlg == econtSHAKE &&
2560 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2561 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2563 fr->bMolPBC = ir->bPeriodicMols;
2568 if (getenv("GMX_USE_GRAPH") != NULL)
2570 fr->bMolPBC = FALSE;
2573 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2580 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2583 fr->bGB = (ir->implicit_solvent == eisGBSA);
2585 fr->rc_scaling = ir->refcoord_scaling;
2586 copy_rvec(ir->posres_com, fr->posres_com);
2587 copy_rvec(ir->posres_comB, fr->posres_comB);
2588 fr->rlist = cutoff_inf(ir->rlist);
2589 fr->rlistlong = cutoff_inf(ir->rlistlong);
2590 fr->eeltype = ir->coulombtype;
2591 fr->vdwtype = ir->vdwtype;
2592 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2594 fr->coulomb_modifier = ir->coulomb_modifier;
2595 fr->vdw_modifier = ir->vdw_modifier;
2597 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2598 switch (fr->eeltype)
2601 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2607 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2611 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2612 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2621 case eelPMEUSERSWITCH:
2622 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2627 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2631 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2635 /* Vdw: Translate from mdp settings to kernel format */
2636 switch (fr->vdwtype)
2641 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2645 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2649 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2655 case evdwENCADSHIFT:
2656 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2660 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2664 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2665 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2666 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2668 fr->rvdw = cutoff_inf(ir->rvdw);
2669 fr->rvdw_switch = ir->rvdw_switch;
2670 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2671 fr->rcoulomb_switch = ir->rcoulomb_switch;
2673 fr->bTwinRange = fr->rlistlong > fr->rlist;
2674 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2676 fr->reppow = mtop->ffparams.reppow;
2678 if (ir->cutoff_scheme == ecutsGROUP)
2680 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2681 && !EVDW_PME(fr->vdwtype));
2682 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2683 fr->bcoultab = !(fr->eeltype == eelCUT ||
2684 fr->eeltype == eelEWALD ||
2685 fr->eeltype == eelPME ||
2686 fr->eeltype == eelRF ||
2687 fr->eeltype == eelRF_ZERO);
2689 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2690 * going to be faster to tabulate the interaction than calling the generic kernel.
2691 * However, if generic kernels have been requested we keep things analytically.
2693 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2694 fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2695 bGenericKernelOnly == FALSE)
2697 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2699 fr->bcoultab = TRUE;
2700 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2701 * which would otherwise need two tables.
2705 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2706 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2707 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2708 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2710 if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2712 fr->bcoultab = TRUE;
2716 if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2718 fr->bcoultab = TRUE;
2720 if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2725 if (getenv("GMX_REQUIRE_TABLES"))
2728 fr->bcoultab = TRUE;
2733 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2734 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2737 if (fr->bvdwtab == TRUE)
2739 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2740 fr->nbkernel_vdw_modifier = eintmodNONE;
2742 if (fr->bcoultab == TRUE)
2744 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2745 fr->nbkernel_elec_modifier = eintmodNONE;
2749 if (ir->cutoff_scheme == ecutsVERLET)
2751 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2753 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2755 fr->bvdwtab = FALSE;
2756 fr->bcoultab = FALSE;
2759 /* Tables are used for direct ewald sum */
2762 if (EEL_PME(ir->coulombtype))
2766 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2768 if (ir->coulombtype == eelP3M_AD)
2770 please_cite(fp, "Hockney1988");
2771 please_cite(fp, "Ballenegger2012");
2775 please_cite(fp, "Essmann95a");
2778 if (ir->ewald_geometry == eewg3DC)
2782 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2784 please_cite(fp, "In-Chul99a");
2787 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2788 init_ewald_tab(&(fr->ewald_table), ir, fp);
2791 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2792 1/fr->ewaldcoeff_q);
2796 if (EVDW_PME(ir->vdwtype))
2800 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2802 please_cite(fp, "Essmann95a");
2803 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2806 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2807 1/fr->ewaldcoeff_lj);
2811 /* Electrostatics */
2812 fr->epsilon_r = ir->epsilon_r;
2813 fr->epsilon_rf = ir->epsilon_rf;
2814 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2816 /* Parameters for generalized RF */
2820 if (fr->eeltype == eelGRF)
2822 init_generalized_rf(fp, mtop, ir, fr);
2825 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2826 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2827 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2828 IR_ELEC_FIELD(*ir) ||
2829 (fr->adress_icor != eAdressICOff)
2832 if (fr->cutoff_scheme == ecutsGROUP &&
2833 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2835 /* Count the total number of charge groups */
2836 fr->cg_nalloc = ncg_mtop(mtop);
2837 srenew(fr->cg_cm, fr->cg_nalloc);
2839 if (fr->shift_vec == NULL)
2841 snew(fr->shift_vec, SHIFTS);
2844 if (fr->fshift == NULL)
2846 snew(fr->fshift, SHIFTS);
2849 if (fr->nbfp == NULL)
2851 fr->ntype = mtop->ffparams.atnr;
2852 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2853 if (EVDW_PME(fr->vdwtype))
2855 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2859 /* Copy the energy group exclusions */
2860 fr->egp_flags = ir->opts.egp_flags;
2862 /* Van der Waals stuff */
2863 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2865 if (fr->rvdw_switch >= fr->rvdw)
2867 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2868 fr->rvdw_switch, fr->rvdw);
2872 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2873 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2874 fr->rvdw_switch, fr->rvdw);
2878 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2880 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2883 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2885 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2888 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2890 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2895 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2896 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2899 fr->eDispCorr = ir->eDispCorr;
2900 if (ir->eDispCorr != edispcNO)
2902 set_avcsixtwelve(fp, fr, mtop);
2907 set_bham_b_max(fp, fr, mtop);
2910 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2912 /* Copy the GBSA data (radius, volume and surftens for each
2913 * atomtype) from the topology atomtype section to forcerec.
2915 snew(fr->atype_radius, fr->ntype);
2916 snew(fr->atype_vol, fr->ntype);
2917 snew(fr->atype_surftens, fr->ntype);
2918 snew(fr->atype_gb_radius, fr->ntype);
2919 snew(fr->atype_S_hct, fr->ntype);
2921 if (mtop->atomtypes.nr > 0)
2923 for (i = 0; i < fr->ntype; i++)
2925 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2927 for (i = 0; i < fr->ntype; i++)
2929 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2931 for (i = 0; i < fr->ntype; i++)
2933 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2935 for (i = 0; i < fr->ntype; i++)
2937 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2939 for (i = 0; i < fr->ntype; i++)
2941 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2945 /* Generate the GB table if needed */
2949 fr->gbtabscale = 2000;
2951 fr->gbtabscale = 500;
2955 fr->gbtab = make_gb_table(oenv, fr);
2957 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2959 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2960 if (!DOMAINDECOMP(cr))
2962 make_local_gb(cr, fr->born, ir->gb_algorithm);
2966 /* Set the charge scaling */
2967 if (fr->epsilon_r != 0)
2969 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2973 /* eps = 0 is infinite dieletric: no coulomb interactions */
2977 /* Reaction field constants */
2978 if (EEL_RF(fr->eeltype))
2980 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2981 fr->rcoulomb, fr->temp, fr->zsquare, box,
2982 &fr->kappa, &fr->k_rf, &fr->c_rf);
2985 /*This now calculates sum for q and c6*/
2986 set_chargesum(fp, fr, mtop);
2988 /* if we are using LR electrostatics, and they are tabulated,
2989 * the tables will contain modified coulomb interactions.
2990 * Since we want to use the non-shifted ones for 1-4
2991 * coulombic interactions, we must have an extra set of tables.
2994 /* Construct tables.
2995 * A little unnecessary to make both vdw and coul tables sometimes,
2996 * but what the heck... */
2998 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2999 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
3001 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
3002 fr->coulomb_modifier != eintmodNONE ||
3003 fr->vdw_modifier != eintmodNONE ||
3004 fr->bBHAM || fr->bEwald) &&
3005 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
3006 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
3007 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
3009 negp_pp = ir->opts.ngener - ir->nwall;
3013 bSomeNormalNbListsAreInUse = TRUE;
3018 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
3019 for (egi = 0; egi < negp_pp; egi++)
3021 for (egj = egi; egj < negp_pp; egj++)
3023 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3024 if (!(egp_flags & EGP_EXCL))
3026 if (egp_flags & EGP_TABLE)
3032 bSomeNormalNbListsAreInUse = TRUE;
3037 if (bSomeNormalNbListsAreInUse)
3039 fr->nnblists = negptable + 1;
3043 fr->nnblists = negptable;
3045 if (fr->nnblists > 1)
3047 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3056 snew(fr->nblists, fr->nnblists);
3058 /* This code automatically gives table length tabext without cut-off's,
3059 * in that case grompp should already have checked that we do not need
3060 * normal tables and we only generate tables for 1-4 interactions.
3062 rtab = ir->rlistlong + ir->tabext;
3066 /* make tables for ordinary interactions */
3067 if (bSomeNormalNbListsAreInUse)
3069 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3072 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3074 if (!bMakeSeparate14Table)
3076 fr->tab14 = fr->nblists[0].table_elec_vdw;
3086 /* Read the special tables for certain energy group pairs */
3087 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3088 for (egi = 0; egi < negp_pp; egi++)
3090 for (egj = egi; egj < negp_pp; egj++)
3092 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3093 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3095 nbl = &(fr->nblists[m]);
3096 if (fr->nnblists > 1)
3098 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3100 /* Read the table file with the two energy groups names appended */
3101 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3102 *mtop->groups.grpname[nm_ind[egi]],
3103 *mtop->groups.grpname[nm_ind[egj]],
3107 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3108 *mtop->groups.grpname[nm_ind[egi]],
3109 *mtop->groups.grpname[nm_ind[egj]],
3110 &fr->nblists[fr->nnblists/2+m]);
3114 else if (fr->nnblists > 1)
3116 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3122 else if ((fr->eDispCorr != edispcNO) &&
3123 ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3124 (fr->vdw_modifier == eintmodFORCESWITCH) ||
3125 (fr->vdw_modifier == eintmodPOTSHIFT)))
3127 /* Tables might not be used for the potential modifier interactions per se, but
3128 * we still need them to evaluate switch/shift dispersion corrections in this case.
3130 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3133 if (bMakeSeparate14Table)
3135 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3136 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3137 GMX_MAKETABLES_14ONLY);
3140 /* Read AdResS Thermo Force table if needed */
3141 if (fr->adress_icor == eAdressICThermoForce)
3143 /* old todo replace */
3145 if (ir->adress->n_tf_grps > 0)
3147 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3152 /* load the default table */
3153 snew(fr->atf_tabs, 1);
3154 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3159 fr->nwall = ir->nwall;
3160 if (ir->nwall && ir->wall_type == ewtTABLE)
3162 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3167 fcd->bondtab = make_bonded_tables(fp,
3168 F_TABBONDS, F_TABBONDSNC,
3170 fcd->angletab = make_bonded_tables(fp,
3173 fcd->dihtab = make_bonded_tables(fp,
3181 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3185 /* QM/MM initialization if requested
3189 fprintf(stderr, "QM/MM calculation requested.\n");
3192 fr->bQMMM = ir->bQMMM;
3193 fr->qr = mk_QMMMrec();
3195 /* Set all the static charge group info */
3196 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3198 &fr->bExcl_IntraCGAll_InterCGNone);
3199 if (DOMAINDECOMP(cr))
3205 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3208 if (!DOMAINDECOMP(cr))
3210 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3211 mtop->natoms, mtop->natoms, mtop->natoms);
3214 fr->print_force = print_force;
3217 /* coarse load balancing vars */
3222 /* Initialize neighbor search */
3223 init_ns(fp, cr, &fr->ns, fr, mtop);
3225 if (cr->duty & DUTY_PP)
3227 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3231 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3236 /* Initialize the thread working data for bonded interactions */
3237 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3239 snew(fr->excl_load, fr->nthreads+1);
3241 if (fr->cutoff_scheme == ecutsVERLET)
3243 if (ir->rcoulomb != ir->rvdw)
3245 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3248 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3251 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3252 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3254 if (ir->eDispCorr != edispcNO)
3256 calc_enervirdiff(fp, ir->eDispCorr, fr);
3260 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3261 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3262 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3264 void pr_forcerec(FILE *fp, t_forcerec *fr)
3268 pr_real(fp, fr->rlist);
3269 pr_real(fp, fr->rcoulomb);
3270 pr_real(fp, fr->fudgeQQ);
3271 pr_bool(fp, fr->bGrid);
3272 pr_bool(fp, fr->bTwinRange);
3273 /*pr_int(fp,fr->cg0);
3274 pr_int(fp,fr->hcg);*/
3275 for (i = 0; i < fr->nnblists; i++)
3277 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3279 pr_real(fp, fr->rcoulomb_switch);
3280 pr_real(fp, fr->rcoulomb);
3285 void forcerec_set_excl_load(t_forcerec *fr,
3286 const gmx_localtop_t *top)
3289 int t, i, j, ntot, n, ntarget;
3291 ind = top->excls.index;
3295 for (i = 0; i < top->excls.nr; i++)
3297 for (j = ind[i]; j < ind[i+1]; j++)
3306 fr->excl_load[0] = 0;
3309 for (t = 1; t <= fr->nthreads; t++)
3311 ntarget = (ntot*t)/fr->nthreads;
3312 while (i < top->excls.nr && n < ntarget)
3314 for (j = ind[i]; j < ind[i+1]; j++)
3323 fr->excl_load[t] = i;