2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "types/commrec.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/math/utilities.h"
49 #include "gromacs/utility/smalloc.h"
51 #include "gromacs/utility/fatalerror.h"
52 #include "gromacs/math/units.h"
55 #include "nonbonded.h"
63 #include "md_support.h"
64 #include "md_logging.h"
68 #include "mtop_util.h"
69 #include "nbnxn_simd.h"
70 #include "nbnxn_search.h"
71 #include "nbnxn_atomdata.h"
72 #include "nbnxn_consts.h"
73 #include "gmx_omp_nthreads.h"
74 #include "gmx_detect_hardware.h"
77 #include "types/nbnxn_cuda_types_ext.h"
78 #include "gpu_utils.h"
79 #include "nbnxn_cuda_data_mgmt.h"
80 #include "pmalloc_cuda.h"
82 t_forcerec *mk_forcerec(void)
92 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
96 for (i = 0; (i < atnr); i++)
98 for (j = 0; (j < atnr); j++)
100 fprintf(fp, "%2d - %2d", i, j);
103 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
104 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
108 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
109 C12(nbfp, atnr, i, j)/12.0);
116 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
124 snew(nbfp, 3*atnr*atnr);
125 for (i = k = 0; (i < atnr); i++)
127 for (j = 0; (j < atnr); j++, k++)
129 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
130 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
131 /* nbfp now includes the 6.0 derivative prefactor */
132 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
138 snew(nbfp, 2*atnr*atnr);
139 for (i = k = 0; (i < atnr); i++)
141 for (j = 0; (j < atnr); j++, k++)
143 /* nbfp now includes the 6.0/12.0 derivative prefactors */
144 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
145 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
153 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
156 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
159 /* For LJ-PME simulations, we correct the energies with the reciprocal space
160 * inside of the cut-off. To do this the non-bonded kernels needs to have
161 * access to the C6-values used on the reciprocal grid in pme.c
165 snew(grid, 2*atnr*atnr);
166 for (i = k = 0; (i < atnr); i++)
168 for (j = 0; (j < atnr); j++, k++)
170 c6i = idef->iparams[i*(atnr+1)].lj.c6;
171 c12i = idef->iparams[i*(atnr+1)].lj.c12;
172 c6j = idef->iparams[j*(atnr+1)].lj.c6;
173 c12j = idef->iparams[j*(atnr+1)].lj.c12;
174 c6 = sqrt(c6i * c6j);
175 if (fr->ljpme_combination_rule == eljpmeLB
176 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
178 sigmai = pow(c12i / c6i, 1.0/6.0);
179 sigmaj = pow(c12j / c6j, 1.0/6.0);
180 epsi = c6i * c6i / c12i;
181 epsj = c6j * c6j / c12j;
182 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
184 /* Store the elements at the same relative positions as C6 in nbfp in order
185 * to simplify access in the kernels
187 grid[2*(atnr*i+j)] = c6*6.0;
193 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
197 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
201 snew(nbfp, 2*atnr*atnr);
202 for (i = 0; i < atnr; ++i)
204 for (j = 0; j < atnr; ++j)
206 c6i = idef->iparams[i*(atnr+1)].lj.c6;
207 c12i = idef->iparams[i*(atnr+1)].lj.c12;
208 c6j = idef->iparams[j*(atnr+1)].lj.c6;
209 c12j = idef->iparams[j*(atnr+1)].lj.c12;
210 c6 = sqrt(c6i * c6j);
211 c12 = sqrt(c12i * c12j);
212 if (comb_rule == eCOMB_ARITHMETIC
213 && !gmx_numzero(c6) && !gmx_numzero(c12))
215 sigmai = pow(c12i / c6i, 1.0/6.0);
216 sigmaj = pow(c12j / c6j, 1.0/6.0);
217 epsi = c6i * c6i / c12i;
218 epsj = c6j * c6j / c12j;
219 c6 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
220 c12 = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
222 C6(nbfp, atnr, i, j) = c6*6.0;
223 C12(nbfp, atnr, i, j) = c12*12.0;
229 /* This routine sets fr->solvent_opt to the most common solvent in the
230 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
231 * the fr->solvent_type array with the correct type (or esolNO).
233 * Charge groups that fulfill the conditions but are not identical to the
234 * most common one will be marked as esolNO in the solvent_type array.
236 * TIP3p is identical to SPC for these purposes, so we call it
237 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
239 * NOTE: QM particle should not
240 * become an optimized solvent. Not even if there is only one charge
250 } solvent_parameters_t;
253 check_solvent_cg(const gmx_moltype_t *molt,
256 const unsigned char *qm_grpnr,
257 const t_grps *qm_grps,
259 int *n_solvent_parameters,
260 solvent_parameters_t **solvent_parameters_p,
264 const t_blocka *excl;
271 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
272 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
275 solvent_parameters_t *solvent_parameters;
277 /* We use a list with parameters for each solvent type.
278 * Every time we discover a new molecule that fulfills the basic
279 * conditions for a solvent we compare with the previous entries
280 * in these lists. If the parameters are the same we just increment
281 * the counter for that type, and otherwise we create a new type
282 * based on the current molecule.
284 * Once we've finished going through all molecules we check which
285 * solvent is most common, and mark all those molecules while we
286 * clear the flag on all others.
289 solvent_parameters = *solvent_parameters_p;
291 /* Mark the cg first as non optimized */
294 /* Check if this cg has no exclusions with atoms in other charge groups
295 * and all atoms inside the charge group excluded.
296 * We only have 3 or 4 atom solvent loops.
298 if (GET_CGINFO_EXCL_INTER(cginfo) ||
299 !GET_CGINFO_EXCL_INTRA(cginfo))
304 /* Get the indices of the first atom in this charge group */
305 j0 = molt->cgs.index[cg0];
306 j1 = molt->cgs.index[cg0+1];
308 /* Number of atoms in our molecule */
314 "Moltype '%s': there are %d atoms in this charge group\n",
318 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
321 if (nj < 3 || nj > 4)
326 /* Check if we are doing QM on this group */
328 if (qm_grpnr != NULL)
330 for (j = j0; j < j1 && !qm; j++)
332 qm = (qm_grpnr[j] < qm_grps->nr - 1);
335 /* Cannot use solvent optimization with QM */
341 atom = molt->atoms.atom;
343 /* Still looks like a solvent, time to check parameters */
345 /* If it is perturbed (free energy) we can't use the solvent loops,
346 * so then we just skip to the next molecule.
350 for (j = j0; j < j1 && !perturbed; j++)
352 perturbed = PERTURBED(atom[j]);
360 /* Now it's only a question if the VdW and charge parameters
361 * are OK. Before doing the check we compare and see if they are
362 * identical to a possible previous solvent type.
363 * First we assign the current types and charges.
365 for (j = 0; j < nj; j++)
367 tmp_vdwtype[j] = atom[j0+j].type;
368 tmp_charge[j] = atom[j0+j].q;
371 /* Does it match any previous solvent type? */
372 for (k = 0; k < *n_solvent_parameters; k++)
377 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
378 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
379 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
384 /* Check that types & charges match for all atoms in molecule */
385 for (j = 0; j < nj && match == TRUE; j++)
387 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
391 if (tmp_charge[j] != solvent_parameters[k].charge[j])
398 /* Congratulations! We have a matched solvent.
399 * Flag it with this type for later processing.
402 solvent_parameters[k].count += nmol;
404 /* We are done with this charge group */
409 /* If we get here, we have a tentative new solvent type.
410 * Before we add it we must check that it fulfills the requirements
411 * of the solvent optimized loops. First determine which atoms have
414 for (j = 0; j < nj; j++)
417 tjA = tmp_vdwtype[j];
419 /* Go through all other tpes and see if any have non-zero
420 * VdW parameters when combined with this one.
422 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
424 /* We already checked that the atoms weren't perturbed,
425 * so we only need to check state A now.
429 has_vdw[j] = (has_vdw[j] ||
430 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
431 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
432 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
437 has_vdw[j] = (has_vdw[j] ||
438 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
439 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
444 /* Now we know all we need to make the final check and assignment. */
448 * For this we require thatn all atoms have charge,
449 * the charges on atom 2 & 3 should be the same, and only
450 * atom 1 might have VdW.
452 if (has_vdw[1] == FALSE &&
453 has_vdw[2] == FALSE &&
454 tmp_charge[0] != 0 &&
455 tmp_charge[1] != 0 &&
456 tmp_charge[2] == tmp_charge[1])
458 srenew(solvent_parameters, *n_solvent_parameters+1);
459 solvent_parameters[*n_solvent_parameters].model = esolSPC;
460 solvent_parameters[*n_solvent_parameters].count = nmol;
461 for (k = 0; k < 3; k++)
463 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
464 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
467 *cg_sp = *n_solvent_parameters;
468 (*n_solvent_parameters)++;
473 /* Or could it be a TIP4P?
474 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
475 * Only atom 1 mght have VdW.
477 if (has_vdw[1] == FALSE &&
478 has_vdw[2] == FALSE &&
479 has_vdw[3] == FALSE &&
480 tmp_charge[0] == 0 &&
481 tmp_charge[1] != 0 &&
482 tmp_charge[2] == tmp_charge[1] &&
485 srenew(solvent_parameters, *n_solvent_parameters+1);
486 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
487 solvent_parameters[*n_solvent_parameters].count = nmol;
488 for (k = 0; k < 4; k++)
490 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
491 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
494 *cg_sp = *n_solvent_parameters;
495 (*n_solvent_parameters)++;
499 *solvent_parameters_p = solvent_parameters;
503 check_solvent(FILE * fp,
504 const gmx_mtop_t * mtop,
506 cginfo_mb_t *cginfo_mb)
509 const t_block * mols;
510 const gmx_moltype_t *molt;
511 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
512 int n_solvent_parameters;
513 solvent_parameters_t *solvent_parameters;
519 fprintf(debug, "Going to determine what solvent types we have.\n");
524 n_solvent_parameters = 0;
525 solvent_parameters = NULL;
526 /* Allocate temporary array for solvent type */
527 snew(cg_sp, mtop->nmolblock);
531 for (mb = 0; mb < mtop->nmolblock; mb++)
533 molt = &mtop->moltype[mtop->molblock[mb].type];
535 /* Here we have to loop over all individual molecules
536 * because we need to check for QMMM particles.
538 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
539 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
540 nmol = mtop->molblock[mb].nmol/nmol_ch;
541 for (mol = 0; mol < nmol_ch; mol++)
544 am = mol*cgs->index[cgs->nr];
545 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
547 check_solvent_cg(molt, cg_mol, nmol,
548 mtop->groups.grpnr[egcQMMM] ?
549 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
550 &mtop->groups.grps[egcQMMM],
552 &n_solvent_parameters, &solvent_parameters,
553 cginfo_mb[mb].cginfo[cgm+cg_mol],
554 &cg_sp[mb][cgm+cg_mol]);
557 cg_offset += cgs->nr;
558 at_offset += cgs->index[cgs->nr];
561 /* Puh! We finished going through all charge groups.
562 * Now find the most common solvent model.
565 /* Most common solvent this far */
567 for (i = 0; i < n_solvent_parameters; i++)
570 solvent_parameters[i].count > solvent_parameters[bestsp].count)
578 bestsol = solvent_parameters[bestsp].model;
585 #ifdef DISABLE_WATER_NLIST
590 for (mb = 0; mb < mtop->nmolblock; mb++)
592 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
593 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
594 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
596 if (cg_sp[mb][i] == bestsp)
598 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
603 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
610 if (bestsol != esolNO && fp != NULL)
612 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
614 solvent_parameters[bestsp].count);
617 sfree(solvent_parameters);
618 fr->solvent_opt = bestsol;
622 acNONE = 0, acCONSTRAINT, acSETTLE
625 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
626 t_forcerec *fr, gmx_bool bNoSolvOpt,
627 gmx_bool *bFEP_NonBonded,
628 gmx_bool *bExcl_IntraCGAll_InterCGNone)
631 const t_blocka *excl;
632 const gmx_moltype_t *molt;
633 const gmx_molblock_t *molb;
634 cginfo_mb_t *cginfo_mb;
637 int cg_offset, a_offset, cgm, am;
638 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
642 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
644 ncg_tot = ncg_mtop(mtop);
645 snew(cginfo_mb, mtop->nmolblock);
647 snew(type_VDW, fr->ntype);
648 for (ai = 0; ai < fr->ntype; ai++)
650 type_VDW[ai] = FALSE;
651 for (j = 0; j < fr->ntype; j++)
653 type_VDW[ai] = type_VDW[ai] ||
655 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
656 C12(fr->nbfp, fr->ntype, ai, j) != 0;
660 *bFEP_NonBonded = FALSE;
661 *bExcl_IntraCGAll_InterCGNone = TRUE;
664 snew(bExcl, excl_nalloc);
667 for (mb = 0; mb < mtop->nmolblock; mb++)
669 molb = &mtop->molblock[mb];
670 molt = &mtop->moltype[molb->type];
674 /* Check if the cginfo is identical for all molecules in this block.
675 * If so, we only need an array of the size of one molecule.
676 * Otherwise we make an array of #mol times #cgs per molecule.
680 for (m = 0; m < molb->nmol; m++)
682 am = m*cgs->index[cgs->nr];
683 for (cg = 0; cg < cgs->nr; cg++)
686 a1 = cgs->index[cg+1];
687 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
688 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
692 if (mtop->groups.grpnr[egcQMMM] != NULL)
694 for (ai = a0; ai < a1; ai++)
696 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
697 mtop->groups.grpnr[egcQMMM][a_offset +ai])
706 cginfo_mb[mb].cg_start = cg_offset;
707 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
708 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
709 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
710 cginfo = cginfo_mb[mb].cginfo;
712 /* Set constraints flags for constrained atoms */
713 snew(a_con, molt->atoms.nr);
714 for (ftype = 0; ftype < F_NRE; ftype++)
716 if (interaction_function[ftype].flags & IF_CONSTRAINT)
721 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
725 for (a = 0; a < nral; a++)
727 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
728 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
734 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
737 am = m*cgs->index[cgs->nr];
738 for (cg = 0; cg < cgs->nr; cg++)
741 a1 = cgs->index[cg+1];
743 /* Store the energy group in cginfo */
744 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
745 SET_CGINFO_GID(cginfo[cgm+cg], gid);
747 /* Check the intra/inter charge group exclusions */
748 if (a1-a0 > excl_nalloc)
750 excl_nalloc = a1 - a0;
751 srenew(bExcl, excl_nalloc);
753 /* bExclIntraAll: all intra cg interactions excluded
754 * bExclInter: any inter cg interactions excluded
756 bExclIntraAll = TRUE;
760 bHavePerturbedAtoms = FALSE;
761 for (ai = a0; ai < a1; ai++)
763 /* Check VDW and electrostatic interactions */
764 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
765 type_VDW[molt->atoms.atom[ai].typeB]);
766 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
767 molt->atoms.atom[ai].qB != 0);
769 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
771 /* Clear the exclusion list for atom ai */
772 for (aj = a0; aj < a1; aj++)
774 bExcl[aj-a0] = FALSE;
776 /* Loop over all the exclusions of atom ai */
777 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
780 if (aj < a0 || aj >= a1)
789 /* Check if ai excludes a0 to a1 */
790 for (aj = a0; aj < a1; aj++)
794 bExclIntraAll = FALSE;
801 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
804 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
812 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
816 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
818 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
820 /* The size in cginfo is currently only read with DD */
821 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
825 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
829 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
831 if (bHavePerturbedAtoms && fr->efep != efepNO)
833 SET_CGINFO_FEP(cginfo[cgm+cg]);
834 *bFEP_NonBonded = TRUE;
836 /* Store the charge group size */
837 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
839 if (!bExclIntraAll || bExclInter)
841 *bExcl_IntraCGAll_InterCGNone = FALSE;
848 cg_offset += molb->nmol*cgs->nr;
849 a_offset += molb->nmol*cgs->index[cgs->nr];
853 /* the solvent optimizer is called after the QM is initialized,
854 * because we don't want to have the QM subsystemto become an
858 check_solvent(fplog, mtop, fr, cginfo_mb);
860 if (getenv("GMX_NO_SOLV_OPT"))
864 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
865 "Disabling all solvent optimization\n");
867 fr->solvent_opt = esolNO;
871 fr->solvent_opt = esolNO;
873 if (!fr->solvent_opt)
875 for (mb = 0; mb < mtop->nmolblock; mb++)
877 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
879 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
887 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
892 ncg = cgi_mb[nmb-1].cg_end;
895 for (cg = 0; cg < ncg; cg++)
897 while (cg >= cgi_mb[mb].cg_end)
902 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
908 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
910 /*This now calculates sum for q and c6*/
911 double qsum, q2sum, q, c6sum, c6;
913 const t_atoms *atoms;
918 for (mb = 0; mb < mtop->nmolblock; mb++)
920 nmol = mtop->molblock[mb].nmol;
921 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
922 for (i = 0; i < atoms->nr; i++)
924 q = atoms->atom[i].q;
927 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
932 fr->q2sum[0] = q2sum;
933 fr->c6sum[0] = c6sum;
935 if (fr->efep != efepNO)
940 for (mb = 0; mb < mtop->nmolblock; mb++)
942 nmol = mtop->molblock[mb].nmol;
943 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
944 for (i = 0; i < atoms->nr; i++)
946 q = atoms->atom[i].qB;
949 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
953 fr->q2sum[1] = q2sum;
954 fr->c6sum[1] = c6sum;
959 fr->qsum[1] = fr->qsum[0];
960 fr->q2sum[1] = fr->q2sum[0];
961 fr->c6sum[1] = fr->c6sum[0];
965 if (fr->efep == efepNO)
967 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
971 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
972 fr->qsum[0], fr->qsum[1]);
977 void update_forcerec(t_forcerec *fr, matrix box)
979 if (fr->eeltype == eelGRF)
981 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
982 fr->rcoulomb, fr->temp, fr->zsquare, box,
983 &fr->kappa, &fr->k_rf, &fr->c_rf);
987 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
989 const t_atoms *atoms, *atoms_tpi;
990 const t_blocka *excl;
991 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
992 gmx_int64_t npair, npair_ij, tmpi, tmpj;
993 double csix, ctwelve;
997 real *nbfp_comb = NULL;
1003 /* For LJ-PME, we want to correct for the difference between the
1004 * actual C6 values and the C6 values used by the LJ-PME based on
1005 * combination rules. */
1007 if (EVDW_PME(fr->vdwtype))
1009 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1010 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1011 for (tpi = 0; tpi < ntp; ++tpi)
1013 for (tpj = 0; tpj < ntp; ++tpj)
1015 C6(nbfp_comb, ntp, tpi, tpj) =
1016 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1017 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1022 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1030 /* Count the types so we avoid natoms^2 operations */
1031 snew(typecount, ntp);
1032 gmx_mtop_count_atomtypes(mtop, q, typecount);
1034 for (tpi = 0; tpi < ntp; tpi++)
1036 for (tpj = tpi; tpj < ntp; tpj++)
1038 tmpi = typecount[tpi];
1039 tmpj = typecount[tpj];
1042 npair_ij = tmpi*tmpj;
1046 npair_ij = tmpi*(tmpi - 1)/2;
1050 /* nbfp now includes the 6.0 derivative prefactor */
1051 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1055 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1056 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1057 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1063 /* Subtract the excluded pairs.
1064 * The main reason for substracting exclusions is that in some cases
1065 * some combinations might never occur and the parameters could have
1066 * any value. These unused values should not influence the dispersion
1069 for (mb = 0; mb < mtop->nmolblock; mb++)
1071 nmol = mtop->molblock[mb].nmol;
1072 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1073 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1074 for (i = 0; (i < atoms->nr); i++)
1078 tpi = atoms->atom[i].type;
1082 tpi = atoms->atom[i].typeB;
1084 j1 = excl->index[i];
1085 j2 = excl->index[i+1];
1086 for (j = j1; j < j2; j++)
1093 tpj = atoms->atom[k].type;
1097 tpj = atoms->atom[k].typeB;
1101 /* nbfp now includes the 6.0 derivative prefactor */
1102 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1106 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1107 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1108 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1118 /* Only correct for the interaction of the test particle
1119 * with the rest of the system.
1122 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1125 for (mb = 0; mb < mtop->nmolblock; mb++)
1127 nmol = mtop->molblock[mb].nmol;
1128 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1129 for (j = 0; j < atoms->nr; j++)
1132 /* Remove the interaction of the test charge group
1135 if (mb == mtop->nmolblock-1)
1139 if (mb == 0 && nmol == 1)
1141 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1146 tpj = atoms->atom[j].type;
1150 tpj = atoms->atom[j].typeB;
1152 for (i = 0; i < fr->n_tpi; i++)
1156 tpi = atoms_tpi->atom[i].type;
1160 tpi = atoms_tpi->atom[i].typeB;
1164 /* nbfp now includes the 6.0 derivative prefactor */
1165 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1169 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1170 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1171 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1178 if (npair - nexcl <= 0 && fplog)
1180 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1186 csix /= npair - nexcl;
1187 ctwelve /= npair - nexcl;
1191 fprintf(debug, "Counted %d exclusions\n", nexcl);
1192 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1193 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1195 fr->avcsix[q] = csix;
1196 fr->avctwelve[q] = ctwelve;
1199 if (EVDW_PME(fr->vdwtype))
1206 if (fr->eDispCorr == edispcAllEner ||
1207 fr->eDispCorr == edispcAllEnerPres)
1209 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1210 fr->avcsix[0], fr->avctwelve[0]);
1214 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1220 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1221 const gmx_mtop_t *mtop)
1223 const t_atoms *at1, *at2;
1224 int mt1, mt2, i, j, tpi, tpj, ntypes;
1230 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1237 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1239 at1 = &mtop->moltype[mt1].atoms;
1240 for (i = 0; (i < at1->nr); i++)
1242 tpi = at1->atom[i].type;
1245 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1248 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1250 at2 = &mtop->moltype[mt2].atoms;
1251 for (j = 0; (j < at2->nr); j++)
1253 tpj = at2->atom[j].type;
1256 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1258 b = BHAMB(nbfp, ntypes, tpi, tpj);
1259 if (b > fr->bham_b_max)
1263 if ((b < bmin) || (bmin == -1))
1273 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1274 bmin, fr->bham_b_max);
1278 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1279 t_forcerec *fr, real rtab,
1280 const t_commrec *cr,
1281 const char *tabfn, char *eg1, char *eg2,
1291 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1296 sprintf(buf, "%s", tabfn);
1299 /* Append the two energy group names */
1300 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1301 eg1, eg2, ftp2ext(efXVG));
1303 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1304 /* Copy the contents of the table to separate coulomb and LJ tables too,
1305 * to improve cache performance.
1307 /* For performance reasons we want
1308 * the table data to be aligned to 16-byte. The pointers could be freed
1309 * but currently aren't.
1311 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1312 nbl->table_elec.format = nbl->table_elec_vdw.format;
1313 nbl->table_elec.r = nbl->table_elec_vdw.r;
1314 nbl->table_elec.n = nbl->table_elec_vdw.n;
1315 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1316 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1317 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1318 nbl->table_elec.ninteractions = 1;
1319 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1320 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1322 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1323 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1324 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1325 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1326 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1327 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1328 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1329 nbl->table_vdw.ninteractions = 2;
1330 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1331 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1333 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1335 for (j = 0; j < 4; j++)
1337 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1339 for (j = 0; j < 8; j++)
1341 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1346 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1347 int *ncount, int **count)
1349 const gmx_moltype_t *molt;
1351 int mt, ftype, stride, i, j, tabnr;
1353 for (mt = 0; mt < mtop->nmoltype; mt++)
1355 molt = &mtop->moltype[mt];
1356 for (ftype = 0; ftype < F_NRE; ftype++)
1358 if (ftype == ftype1 || ftype == ftype2)
1360 il = &molt->ilist[ftype];
1361 stride = 1 + NRAL(ftype);
1362 for (i = 0; i < il->nr; i += stride)
1364 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1367 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1369 if (tabnr >= *ncount)
1371 srenew(*count, tabnr+1);
1372 for (j = *ncount; j < tabnr+1; j++)
1385 static bondedtable_t *make_bonded_tables(FILE *fplog,
1386 int ftype1, int ftype2,
1387 const gmx_mtop_t *mtop,
1388 const char *basefn, const char *tabext)
1390 int i, ncount, *count;
1398 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1403 for (i = 0; i < ncount; i++)
1407 sprintf(tabfn, "%s", basefn);
1408 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1409 tabext, i, ftp2ext(efXVG));
1410 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1419 void forcerec_set_ranges(t_forcerec *fr,
1420 int ncg_home, int ncg_force,
1422 int natoms_force_constr, int natoms_f_novirsum)
1427 /* fr->ncg_force is unused in the standard code,
1428 * but it can be useful for modified code dealing with charge groups.
1430 fr->ncg_force = ncg_force;
1431 fr->natoms_force = natoms_force;
1432 fr->natoms_force_constr = natoms_force_constr;
1434 if (fr->natoms_force_constr > fr->nalloc_force)
1436 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1440 srenew(fr->f_twin, fr->nalloc_force);
1444 if (fr->bF_NoVirSum)
1446 fr->f_novirsum_n = natoms_f_novirsum;
1447 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1449 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1450 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1455 fr->f_novirsum_n = 0;
1459 static real cutoff_inf(real cutoff)
1463 cutoff = GMX_CUTOFF_INF;
1469 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1470 t_forcerec *fr, const t_inputrec *ir,
1471 const char *tabfn, const gmx_mtop_t *mtop,
1479 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1483 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1485 sprintf(buf, "%s", tabfn);
1486 for (i = 0; i < ir->adress->n_tf_grps; i++)
1488 j = ir->adress->tf_table_index[i]; /* get energy group index */
1489 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1490 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1493 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1495 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1500 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1507 ir->rcoulomb == 0 &&
1509 ir->ePBC == epbcNONE &&
1510 ir->vdwtype == evdwCUT &&
1511 ir->coulombtype == eelCUT &&
1512 ir->efep == efepNO &&
1513 (ir->implicit_solvent == eisNO ||
1514 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1515 ir->gb_algorithm == egbHCT ||
1516 ir->gb_algorithm == egbOBC))) &&
1517 getenv("GMX_NO_ALLVSALL") == NULL
1520 if (bAllvsAll && ir->opts.ngener > 1)
1522 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1528 fprintf(stderr, "\n%s\n", note);
1532 fprintf(fp, "\n%s\n", note);
1538 if (bAllvsAll && fp && MASTER(cr))
1540 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1547 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1551 /* These thread local data structures are used for bondeds only */
1552 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1554 if (fr->nthreads > 1)
1556 snew(fr->f_t, fr->nthreads);
1557 /* Thread 0 uses the global force and energy arrays */
1558 for (t = 1; t < fr->nthreads; t++)
1560 fr->f_t[t].f = NULL;
1561 fr->f_t[t].f_nalloc = 0;
1562 snew(fr->f_t[t].fshift, SHIFTS);
1563 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1564 for (i = 0; i < egNR; i++)
1566 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1573 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1574 const t_commrec *cr,
1575 const t_inputrec *ir,
1578 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1580 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1581 bGPU ? "GPUs" : "SIMD kernels",
1582 bGPU ? "CPU only" : "plain-C kernels");
1590 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1594 *kernel_type = nbnxnk4x4_PlainC;
1595 *ewald_excl = ewaldexclTable;
1597 #ifdef GMX_NBNXN_SIMD
1599 #ifdef GMX_NBNXN_SIMD_4XN
1600 *kernel_type = nbnxnk4xN_SIMD_4xN;
1602 #ifdef GMX_NBNXN_SIMD_2XNN
1603 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1606 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1607 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1608 * Currently this is based on the SIMD acceleration choice,
1609 * but it might be better to decide this at runtime based on CPU.
1611 * 4xN calculates more (zero) interactions, but has less pair-search
1612 * work and much better kernel instruction scheduling.
1614 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1615 * which doesn't have FMA, both the analytical and tabulated Ewald
1616 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1617 * 2x(4+4) because it results in significantly fewer pairs.
1618 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1619 * 10% with HT, 50% without HT. As we currently don't detect the actual
1620 * use of HT, use 4x8 to avoid a potential performance hit.
1621 * On Intel Haswell 4x8 is always faster.
1623 *kernel_type = nbnxnk4xN_SIMD_4xN;
1625 #ifndef GMX_SIMD_HAVE_FMA
1626 if (EEL_PME_EWALD(ir->coulombtype) ||
1627 EVDW_PME(ir->vdwtype))
1629 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1630 * There are enough instructions to make 2x(4+4) efficient.
1632 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1635 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1638 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1640 #ifdef GMX_NBNXN_SIMD_4XN
1641 *kernel_type = nbnxnk4xN_SIMD_4xN;
1643 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1646 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1648 #ifdef GMX_NBNXN_SIMD_2XNN
1649 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1651 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1655 /* Analytical Ewald exclusion correction is only an option in
1657 * Since table lookup's don't parallelize with SIMD, analytical
1658 * will probably always be faster for a SIMD width of 8 or more.
1659 * With FMA analytical is sometimes faster for a width if 4 as well.
1660 * On BlueGene/Q, this is faster regardless of precision.
1661 * In single precision, this is faster on Bulldozer.
1663 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1664 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1665 defined GMX_SIMD_IBM_QPX
1666 *ewald_excl = ewaldexclAnalytical;
1668 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1670 *ewald_excl = ewaldexclTable;
1672 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1674 *ewald_excl = ewaldexclAnalytical;
1678 #endif /* GMX_NBNXN_SIMD */
1682 const char *lookup_nbnxn_kernel_name(int kernel_type)
1684 const char *returnvalue = NULL;
1685 switch (kernel_type)
1688 returnvalue = "not set";
1690 case nbnxnk4x4_PlainC:
1691 returnvalue = "plain C";
1693 case nbnxnk4xN_SIMD_4xN:
1694 case nbnxnk4xN_SIMD_2xNN:
1695 #ifdef GMX_NBNXN_SIMD
1696 #if defined GMX_SIMD_X86_SSE2
1697 returnvalue = "SSE2";
1698 #elif defined GMX_SIMD_X86_SSE4_1
1699 returnvalue = "SSE4.1";
1700 #elif defined GMX_SIMD_X86_AVX_128_FMA
1701 returnvalue = "AVX_128_FMA";
1702 #elif defined GMX_SIMD_X86_AVX_256
1703 returnvalue = "AVX_256";
1704 #elif defined GMX_SIMD_X86_AVX2_256
1705 returnvalue = "AVX2_256";
1707 returnvalue = "SIMD";
1709 #else /* GMX_NBNXN_SIMD */
1710 returnvalue = "not available";
1711 #endif /* GMX_NBNXN_SIMD */
1713 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1714 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1718 gmx_fatal(FARGS, "Illegal kernel type selected");
1725 static void pick_nbnxn_kernel(FILE *fp,
1726 const t_commrec *cr,
1727 gmx_bool use_simd_kernels,
1729 gmx_bool bEmulateGPU,
1730 const t_inputrec *ir,
1733 gmx_bool bDoNonbonded)
1735 assert(kernel_type);
1737 *kernel_type = nbnxnkNotSet;
1738 *ewald_excl = ewaldexclTable;
1742 *kernel_type = nbnxnk8x8x8_PlainC;
1746 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1751 *kernel_type = nbnxnk8x8x8_CUDA;
1754 if (*kernel_type == nbnxnkNotSet)
1756 /* LJ PME with LB combination rule does 7 mesh operations.
1757 * This so slow that we don't compile SIMD non-bonded kernels for that.
1759 if (use_simd_kernels &&
1760 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1762 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1766 *kernel_type = nbnxnk4x4_PlainC;
1770 if (bDoNonbonded && fp != NULL)
1772 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1773 lookup_nbnxn_kernel_name(*kernel_type),
1774 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1775 nbnxn_kernel_to_cj_size(*kernel_type));
1779 static void pick_nbnxn_resources(const t_commrec *cr,
1780 const gmx_hw_info_t *hwinfo,
1781 gmx_bool bDoNonbonded,
1783 gmx_bool *bEmulateGPU,
1784 const gmx_gpu_opt_t *gpu_opt)
1786 gmx_bool bEmulateGPUEnvVarSet;
1787 char gpu_err_str[STRLEN];
1791 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1793 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1794 * GPUs (currently) only handle non-bonded calculations, we will
1795 * automatically switch to emulation if non-bonded calculations are
1796 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1797 * way to turn off GPU initialization, data movement, and cleanup.
1799 * GPU emulation can be useful to assess the performance one can expect by
1800 * adding GPU(s) to the machine. The conditional below allows this even
1801 * if mdrun is compiled without GPU acceleration support.
1802 * Note that you should freezing the system as otherwise it will explode.
1804 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1806 gpu_opt->ncuda_dev_use > 0));
1808 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1810 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1812 /* Each PP node will use the intra-node id-th device from the
1813 * list of detected/selected GPUs. */
1814 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1815 &hwinfo->gpu_info, gpu_opt))
1817 /* At this point the init should never fail as we made sure that
1818 * we have all the GPUs we need. If it still does, we'll bail. */
1819 gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1821 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1822 cr->rank_pp_intranode),
1826 /* Here we actually turn on hardware GPU acceleration */
1831 gmx_bool uses_simple_tables(int cutoff_scheme,
1832 nonbonded_verlet_t *nbv,
1835 gmx_bool bUsesSimpleTables = TRUE;
1838 switch (cutoff_scheme)
1841 bUsesSimpleTables = TRUE;
1844 assert(NULL != nbv && NULL != nbv->grp);
1845 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1846 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1849 gmx_incons("unimplemented");
1851 return bUsesSimpleTables;
1854 static void init_ewald_f_table(interaction_const_t *ic,
1855 gmx_bool bUsesSimpleTables,
1860 if (bUsesSimpleTables)
1862 /* With a spacing of 0.0005 we are at the force summation accuracy
1863 * for the SSE kernels for "normal" atomistic simulations.
1865 ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1868 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1869 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1873 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1874 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1875 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1878 sfree_aligned(ic->tabq_coul_FDV0);
1879 sfree_aligned(ic->tabq_coul_F);
1880 sfree_aligned(ic->tabq_coul_V);
1882 sfree_aligned(ic->tabq_vdw_FDV0);
1883 sfree_aligned(ic->tabq_vdw_F);
1884 sfree_aligned(ic->tabq_vdw_V);
1886 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1888 /* Create the original table data in FDV0 */
1889 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1890 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1891 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1892 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1893 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1896 if (EVDW_PME(ic->vdwtype))
1898 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1899 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1900 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1901 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1902 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1906 void init_interaction_const_tables(FILE *fp,
1907 interaction_const_t *ic,
1908 gmx_bool bUsesSimpleTables,
1913 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1915 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1919 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1920 1/ic->tabq_scale, ic->tabq_size);
1925 static void clear_force_switch_constants(shift_consts_t *sc)
1932 static void force_switch_constants(real p,
1936 /* Here we determine the coefficient for shifting the force to zero
1937 * between distance rsw and the cut-off rc.
1938 * For a potential of r^-p, we have force p*r^-(p+1).
1939 * But to save flops we absorb p in the coefficient.
1941 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1942 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1944 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1945 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1946 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1949 static void potential_switch_constants(real rsw, real rc,
1950 switch_consts_t *sc)
1952 /* The switch function is 1 at rsw and 0 at rc.
1953 * The derivative and second derivate are zero at both ends.
1954 * rsw = max(r - r_switch, 0)
1955 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1956 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1957 * force = force*dsw - potential*sw
1960 sc->c3 = -10*pow(rc - rsw, -3);
1961 sc->c4 = 15*pow(rc - rsw, -4);
1962 sc->c5 = -6*pow(rc - rsw, -5);
1966 init_interaction_const(FILE *fp,
1967 const t_commrec gmx_unused *cr,
1968 interaction_const_t **interaction_const,
1969 const t_forcerec *fr,
1972 interaction_const_t *ic;
1973 gmx_bool bUsesSimpleTables = TRUE;
1977 /* Just allocate something so we can free it */
1978 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1979 snew_aligned(ic->tabq_coul_F, 16, 32);
1980 snew_aligned(ic->tabq_coul_V, 16, 32);
1982 ic->rlist = fr->rlist;
1983 ic->rlistlong = fr->rlistlong;
1986 ic->vdwtype = fr->vdwtype;
1987 ic->vdw_modifier = fr->vdw_modifier;
1988 ic->rvdw = fr->rvdw;
1989 ic->rvdw_switch = fr->rvdw_switch;
1990 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1991 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1992 ic->sh_lj_ewald = 0;
1993 clear_force_switch_constants(&ic->dispersion_shift);
1994 clear_force_switch_constants(&ic->repulsion_shift);
1996 switch (ic->vdw_modifier)
1998 case eintmodPOTSHIFT:
1999 /* Only shift the potential, don't touch the force */
2000 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2001 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
2002 if (EVDW_PME(ic->vdwtype))
2006 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2007 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2010 case eintmodFORCESWITCH:
2011 /* Switch the force, switch and shift the potential */
2012 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2013 &ic->dispersion_shift);
2014 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2015 &ic->repulsion_shift);
2017 case eintmodPOTSWITCH:
2018 /* Switch the potential and force */
2019 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2023 case eintmodEXACTCUTOFF:
2024 /* Nothing to do here */
2027 gmx_incons("unimplemented potential modifier");
2030 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2032 /* Electrostatics */
2033 ic->eeltype = fr->eeltype;
2034 ic->coulomb_modifier = fr->coulomb_modifier;
2035 ic->rcoulomb = fr->rcoulomb;
2036 ic->epsilon_r = fr->epsilon_r;
2037 ic->epsfac = fr->epsfac;
2038 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2040 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2042 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2049 /* Reaction-field */
2050 if (EEL_RF(ic->eeltype))
2052 ic->epsilon_rf = fr->epsilon_rf;
2053 ic->k_rf = fr->k_rf;
2054 ic->c_rf = fr->c_rf;
2058 /* For plain cut-off we might use the reaction-field kernels */
2059 ic->epsilon_rf = ic->epsilon_r;
2061 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2063 ic->c_rf = 1/ic->rcoulomb;
2073 real dispersion_shift;
2075 dispersion_shift = ic->dispersion_shift.cpot;
2076 if (EVDW_PME(ic->vdwtype))
2078 dispersion_shift -= ic->sh_lj_ewald;
2080 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2081 ic->repulsion_shift.cpot, dispersion_shift);
2083 if (ic->eeltype == eelCUT)
2085 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2087 else if (EEL_PME(ic->eeltype))
2089 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2094 *interaction_const = ic;
2096 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2098 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2100 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2101 * also sharing texture references. To keep the code simple, we don't
2102 * treat texture references as shared resources, but this means that
2103 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2104 * Hence, to ensure that the non-bonded kernels don't start before all
2105 * texture binding operations are finished, we need to wait for all ranks
2106 * to arrive here before continuing.
2108 * Note that we could omit this barrier if GPUs are not shared (or
2109 * texture objects are used), but as this is initialization code, there
2110 * is not point in complicating things.
2112 #ifdef GMX_THREAD_MPI
2117 #endif /* GMX_THREAD_MPI */
2120 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2121 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2124 static void init_nb_verlet(FILE *fp,
2125 nonbonded_verlet_t **nb_verlet,
2126 gmx_bool bFEP_NonBonded,
2127 const t_inputrec *ir,
2128 const t_forcerec *fr,
2129 const t_commrec *cr,
2130 const char *nbpu_opt)
2132 nonbonded_verlet_t *nbv;
2135 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2137 nbnxn_alloc_t *nb_alloc;
2138 nbnxn_free_t *nb_free;
2142 pick_nbnxn_resources(cr, fr->hwinfo,
2150 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2151 for (i = 0; i < nbv->ngrp; i++)
2153 nbv->grp[i].nbl_lists.nnbl = 0;
2154 nbv->grp[i].nbat = NULL;
2155 nbv->grp[i].kernel_type = nbnxnkNotSet;
2157 if (i == 0) /* local */
2159 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2160 nbv->bUseGPU, bEmulateGPU, ir,
2161 &nbv->grp[i].kernel_type,
2162 &nbv->grp[i].ewald_excl,
2165 else /* non-local */
2167 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2169 /* Use GPU for local, select a CPU kernel for non-local */
2170 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2172 &nbv->grp[i].kernel_type,
2173 &nbv->grp[i].ewald_excl,
2176 bHybridGPURun = TRUE;
2180 /* Use the same kernel for local and non-local interactions */
2181 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2182 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2189 /* init the NxN GPU data; the last argument tells whether we'll have
2190 * both local and non-local NB calculation on GPU */
2191 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2192 &fr->hwinfo->gpu_info, fr->gpu_opt,
2193 cr->rank_pp_intranode,
2194 (nbv->ngrp > 1) && !bHybridGPURun);
2196 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2200 nbv->min_ci_balanced = strtol(env, &end, 10);
2201 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2203 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2208 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2209 nbv->min_ci_balanced);
2214 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2217 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2218 nbv->min_ci_balanced);
2224 nbv->min_ci_balanced = 0;
2229 nbnxn_init_search(&nbv->nbs,
2230 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2231 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2233 gmx_omp_nthreads_get(emntNonbonded));
2235 for (i = 0; i < nbv->ngrp; i++)
2237 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2239 nb_alloc = &pmalloc;
2248 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2249 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2250 /* 8x8x8 "non-simple" lists are ATM always combined */
2251 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2255 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2257 gmx_bool bSimpleList;
2258 int enbnxninitcombrule;
2260 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2262 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2264 /* Plain LJ cut-off: we can optimize with combination rules */
2265 enbnxninitcombrule = enbnxninitcombruleDETECT;
2267 else if (fr->vdwtype == evdwPME)
2269 /* LJ-PME: we need to use a combination rule for the grid */
2270 if (fr->ljpme_combination_rule == eljpmeGEOM)
2272 enbnxninitcombrule = enbnxninitcombruleGEOM;
2276 enbnxninitcombrule = enbnxninitcombruleLB;
2281 /* We use a full combination matrix: no rule required */
2282 enbnxninitcombrule = enbnxninitcombruleNONE;
2286 snew(nbv->grp[i].nbat, 1);
2287 nbnxn_atomdata_init(fp,
2289 nbv->grp[i].kernel_type,
2291 fr->ntype, fr->nbfp,
2293 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2298 nbv->grp[i].nbat = nbv->grp[0].nbat;
2303 void init_forcerec(FILE *fp,
2304 const output_env_t oenv,
2307 const t_inputrec *ir,
2308 const gmx_mtop_t *mtop,
2309 const t_commrec *cr,
2315 const char *nbpu_opt,
2316 gmx_bool bNoSolvOpt,
2319 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2324 gmx_bool bGenericKernelOnly;
2325 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2326 gmx_bool bFEP_NonBonded;
2328 int *nm_ind, egp_flags;
2330 if (fr->hwinfo == NULL)
2332 /* Detect hardware, gather information.
2333 * In mdrun, hwinfo has already been set before calling init_forcerec.
2334 * Here we ignore GPUs, as tools will not use them anyhow.
2336 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2339 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2340 fr->use_simd_kernels = TRUE;
2342 fr->bDomDec = DOMAINDECOMP(cr);
2344 natoms = mtop->natoms;
2346 if (check_box(ir->ePBC, box))
2348 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2351 /* Test particle insertion ? */
2354 /* Set to the size of the molecule to be inserted (the last one) */
2355 /* Because of old style topologies, we have to use the last cg
2356 * instead of the last molecule type.
2358 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2359 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2360 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2362 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2370 /* Copy AdResS parameters */
2373 fr->adress_type = ir->adress->type;
2374 fr->adress_const_wf = ir->adress->const_wf;
2375 fr->adress_ex_width = ir->adress->ex_width;
2376 fr->adress_hy_width = ir->adress->hy_width;
2377 fr->adress_icor = ir->adress->icor;
2378 fr->adress_site = ir->adress->site;
2379 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2380 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2383 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2384 for (i = 0; i < ir->adress->n_energy_grps; i++)
2386 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2389 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2390 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2391 for (i = 0; i < fr->n_adress_tf_grps; i++)
2393 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2395 copy_rvec(ir->adress->refs, fr->adress_refs);
2399 fr->adress_type = eAdressOff;
2400 fr->adress_do_hybridpairs = FALSE;
2403 /* Copy the user determined parameters */
2404 fr->userint1 = ir->userint1;
2405 fr->userint2 = ir->userint2;
2406 fr->userint3 = ir->userint3;
2407 fr->userint4 = ir->userint4;
2408 fr->userreal1 = ir->userreal1;
2409 fr->userreal2 = ir->userreal2;
2410 fr->userreal3 = ir->userreal3;
2411 fr->userreal4 = ir->userreal4;
2414 fr->fc_stepsize = ir->fc_stepsize;
2417 fr->efep = ir->efep;
2418 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2419 if (ir->fepvals->bScCoul)
2421 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2422 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2426 fr->sc_alphacoul = 0;
2427 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2429 fr->sc_power = ir->fepvals->sc_power;
2430 fr->sc_r_power = ir->fepvals->sc_r_power;
2431 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2433 env = getenv("GMX_SCSIGMA_MIN");
2437 sscanf(env, "%lf", &dbl);
2438 fr->sc_sigma6_min = pow(dbl, 6);
2441 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2445 fr->bNonbonded = TRUE;
2446 if (getenv("GMX_NO_NONBONDED") != NULL)
2448 /* turn off non-bonded calculations */
2449 fr->bNonbonded = FALSE;
2450 md_print_warn(cr, fp,
2451 "Found environment variable GMX_NO_NONBONDED.\n"
2452 "Disabling nonbonded calculations.\n");
2455 bGenericKernelOnly = FALSE;
2457 /* We now check in the NS code whether a particular combination of interactions
2458 * can be used with water optimization, and disable it if that is not the case.
2461 if (getenv("GMX_NB_GENERIC") != NULL)
2466 "Found environment variable GMX_NB_GENERIC.\n"
2467 "Disabling all interaction-specific nonbonded kernels, will only\n"
2468 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2470 bGenericKernelOnly = TRUE;
2473 if (bGenericKernelOnly == TRUE)
2478 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2480 fr->use_simd_kernels = FALSE;
2484 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2485 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2489 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2491 /* Check if we can/should do all-vs-all kernels */
2492 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2493 fr->AllvsAll_work = NULL;
2494 fr->AllvsAll_workgb = NULL;
2496 /* All-vs-all kernels have not been implemented in 4.6, and
2497 * the SIMD group kernels are also buggy in this case. Non-SIMD
2498 * group kernels are OK. See Redmine #1249. */
2501 fr->bAllvsAll = FALSE;
2502 fr->use_simd_kernels = FALSE;
2506 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2507 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2508 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2509 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2510 "we are proceeding by disabling all CPU architecture-specific\n"
2511 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2512 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2516 /* Neighbour searching stuff */
2517 fr->cutoff_scheme = ir->cutoff_scheme;
2518 fr->bGrid = (ir->ns_type == ensGRID);
2519 fr->ePBC = ir->ePBC;
2521 if (fr->cutoff_scheme == ecutsGROUP)
2523 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2524 "removed in a future release when 'verlet' supports all interaction forms.\n";
2528 fprintf(stderr, "\n%s\n", note);
2532 fprintf(fp, "\n%s\n", note);
2536 /* Determine if we will do PBC for distances in bonded interactions */
2537 if (fr->ePBC == epbcNONE)
2539 fr->bMolPBC = FALSE;
2543 if (!DOMAINDECOMP(cr))
2545 /* The group cut-off scheme and SHAKE assume charge groups
2546 * are whole, but not using molpbc is faster in most cases.
2548 if (fr->cutoff_scheme == ecutsGROUP ||
2549 (ir->eConstrAlg == econtSHAKE &&
2550 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2551 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2553 fr->bMolPBC = ir->bPeriodicMols;
2558 if (getenv("GMX_USE_GRAPH") != NULL)
2560 fr->bMolPBC = FALSE;
2563 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2570 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2573 fr->bGB = (ir->implicit_solvent == eisGBSA);
2575 fr->rc_scaling = ir->refcoord_scaling;
2576 copy_rvec(ir->posres_com, fr->posres_com);
2577 copy_rvec(ir->posres_comB, fr->posres_comB);
2578 fr->rlist = cutoff_inf(ir->rlist);
2579 fr->rlistlong = cutoff_inf(ir->rlistlong);
2580 fr->eeltype = ir->coulombtype;
2581 fr->vdwtype = ir->vdwtype;
2582 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2584 fr->coulomb_modifier = ir->coulomb_modifier;
2585 fr->vdw_modifier = ir->vdw_modifier;
2587 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2588 switch (fr->eeltype)
2591 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2597 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2601 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2602 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2611 case eelPMEUSERSWITCH:
2612 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2617 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2621 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2625 /* Vdw: Translate from mdp settings to kernel format */
2626 switch (fr->vdwtype)
2631 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2635 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2639 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2645 case evdwENCADSHIFT:
2646 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2650 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2654 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2655 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2656 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2658 fr->bTwinRange = fr->rlistlong > fr->rlist;
2659 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2661 fr->reppow = mtop->ffparams.reppow;
2663 if (ir->cutoff_scheme == ecutsGROUP)
2665 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2666 && !EVDW_PME(fr->vdwtype));
2667 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2668 fr->bcoultab = !(fr->eeltype == eelCUT ||
2669 fr->eeltype == eelEWALD ||
2670 fr->eeltype == eelPME ||
2671 fr->eeltype == eelRF ||
2672 fr->eeltype == eelRF_ZERO);
2674 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2675 * going to be faster to tabulate the interaction than calling the generic kernel.
2677 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2679 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2681 fr->bcoultab = TRUE;
2684 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2685 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2686 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2687 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2689 if (fr->rcoulomb != fr->rvdw)
2691 fr->bcoultab = TRUE;
2695 if (getenv("GMX_REQUIRE_TABLES"))
2698 fr->bcoultab = TRUE;
2703 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2704 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2707 if (fr->bvdwtab == TRUE)
2709 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2710 fr->nbkernel_vdw_modifier = eintmodNONE;
2712 if (fr->bcoultab == TRUE)
2714 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2715 fr->nbkernel_elec_modifier = eintmodNONE;
2719 if (ir->cutoff_scheme == ecutsVERLET)
2721 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2723 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2725 fr->bvdwtab = FALSE;
2726 fr->bcoultab = FALSE;
2729 /* Tables are used for direct ewald sum */
2732 if (EEL_PME(ir->coulombtype))
2736 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2738 if (ir->coulombtype == eelP3M_AD)
2740 please_cite(fp, "Hockney1988");
2741 please_cite(fp, "Ballenegger2012");
2745 please_cite(fp, "Essmann95a");
2748 if (ir->ewald_geometry == eewg3DC)
2752 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2754 please_cite(fp, "In-Chul99a");
2757 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2758 init_ewald_tab(&(fr->ewald_table), ir, fp);
2761 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2762 1/fr->ewaldcoeff_q);
2766 if (EVDW_PME(ir->vdwtype))
2770 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2772 please_cite(fp, "Essmann95a");
2773 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2776 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2777 1/fr->ewaldcoeff_lj);
2781 /* Electrostatics */
2782 fr->epsilon_r = ir->epsilon_r;
2783 fr->epsilon_rf = ir->epsilon_rf;
2784 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2785 fr->rcoulomb_switch = ir->rcoulomb_switch;
2786 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2788 /* Parameters for generalized RF */
2792 if (fr->eeltype == eelGRF)
2794 init_generalized_rf(fp, mtop, ir, fr);
2797 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2798 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2799 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2800 IR_ELEC_FIELD(*ir) ||
2801 (fr->adress_icor != eAdressICOff)
2804 if (fr->cutoff_scheme == ecutsGROUP &&
2805 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2807 /* Count the total number of charge groups */
2808 fr->cg_nalloc = ncg_mtop(mtop);
2809 srenew(fr->cg_cm, fr->cg_nalloc);
2811 if (fr->shift_vec == NULL)
2813 snew(fr->shift_vec, SHIFTS);
2816 if (fr->fshift == NULL)
2818 snew(fr->fshift, SHIFTS);
2821 if (fr->nbfp == NULL)
2823 fr->ntype = mtop->ffparams.atnr;
2824 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2825 if (EVDW_PME(fr->vdwtype))
2827 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2831 /* Copy the energy group exclusions */
2832 fr->egp_flags = ir->opts.egp_flags;
2834 /* Van der Waals stuff */
2835 fr->rvdw = cutoff_inf(ir->rvdw);
2836 fr->rvdw_switch = ir->rvdw_switch;
2837 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2839 if (fr->rvdw_switch >= fr->rvdw)
2841 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2842 fr->rvdw_switch, fr->rvdw);
2846 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2847 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2848 fr->rvdw_switch, fr->rvdw);
2852 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2854 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2857 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2859 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2864 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2865 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2868 fr->eDispCorr = ir->eDispCorr;
2869 if (ir->eDispCorr != edispcNO)
2871 set_avcsixtwelve(fp, fr, mtop);
2876 set_bham_b_max(fp, fr, mtop);
2879 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2881 /* Copy the GBSA data (radius, volume and surftens for each
2882 * atomtype) from the topology atomtype section to forcerec.
2884 snew(fr->atype_radius, fr->ntype);
2885 snew(fr->atype_vol, fr->ntype);
2886 snew(fr->atype_surftens, fr->ntype);
2887 snew(fr->atype_gb_radius, fr->ntype);
2888 snew(fr->atype_S_hct, fr->ntype);
2890 if (mtop->atomtypes.nr > 0)
2892 for (i = 0; i < fr->ntype; i++)
2894 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2896 for (i = 0; i < fr->ntype; i++)
2898 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2900 for (i = 0; i < fr->ntype; i++)
2902 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2904 for (i = 0; i < fr->ntype; i++)
2906 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2908 for (i = 0; i < fr->ntype; i++)
2910 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2914 /* Generate the GB table if needed */
2918 fr->gbtabscale = 2000;
2920 fr->gbtabscale = 500;
2924 fr->gbtab = make_gb_table(oenv, fr);
2926 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2928 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2929 if (!DOMAINDECOMP(cr))
2931 make_local_gb(cr, fr->born, ir->gb_algorithm);
2935 /* Set the charge scaling */
2936 if (fr->epsilon_r != 0)
2938 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2942 /* eps = 0 is infinite dieletric: no coulomb interactions */
2946 /* Reaction field constants */
2947 if (EEL_RF(fr->eeltype))
2949 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2950 fr->rcoulomb, fr->temp, fr->zsquare, box,
2951 &fr->kappa, &fr->k_rf, &fr->c_rf);
2954 /*This now calculates sum for q and c6*/
2955 set_chargesum(fp, fr, mtop);
2957 /* if we are using LR electrostatics, and they are tabulated,
2958 * the tables will contain modified coulomb interactions.
2959 * Since we want to use the non-shifted ones for 1-4
2960 * coulombic interactions, we must have an extra set of tables.
2963 /* Construct tables.
2964 * A little unnecessary to make both vdw and coul tables sometimes,
2965 * but what the heck... */
2967 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2968 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2970 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2971 fr->bBHAM || fr->bEwald) &&
2972 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2973 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2974 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2976 negp_pp = ir->opts.ngener - ir->nwall;
2980 bSomeNormalNbListsAreInUse = TRUE;
2985 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2986 for (egi = 0; egi < negp_pp; egi++)
2988 for (egj = egi; egj < negp_pp; egj++)
2990 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2991 if (!(egp_flags & EGP_EXCL))
2993 if (egp_flags & EGP_TABLE)
2999 bSomeNormalNbListsAreInUse = TRUE;
3004 if (bSomeNormalNbListsAreInUse)
3006 fr->nnblists = negptable + 1;
3010 fr->nnblists = negptable;
3012 if (fr->nnblists > 1)
3014 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3023 snew(fr->nblists, fr->nnblists);
3025 /* This code automatically gives table length tabext without cut-off's,
3026 * in that case grompp should already have checked that we do not need
3027 * normal tables and we only generate tables for 1-4 interactions.
3029 rtab = ir->rlistlong + ir->tabext;
3033 /* make tables for ordinary interactions */
3034 if (bSomeNormalNbListsAreInUse)
3036 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3039 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3041 if (!bMakeSeparate14Table)
3043 fr->tab14 = fr->nblists[0].table_elec_vdw;
3053 /* Read the special tables for certain energy group pairs */
3054 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3055 for (egi = 0; egi < negp_pp; egi++)
3057 for (egj = egi; egj < negp_pp; egj++)
3059 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3060 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3062 nbl = &(fr->nblists[m]);
3063 if (fr->nnblists > 1)
3065 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3067 /* Read the table file with the two energy groups names appended */
3068 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3069 *mtop->groups.grpname[nm_ind[egi]],
3070 *mtop->groups.grpname[nm_ind[egj]],
3074 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3075 *mtop->groups.grpname[nm_ind[egi]],
3076 *mtop->groups.grpname[nm_ind[egj]],
3077 &fr->nblists[fr->nnblists/2+m]);
3081 else if (fr->nnblists > 1)
3083 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3089 if (bMakeSeparate14Table)
3091 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3092 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3093 GMX_MAKETABLES_14ONLY);
3096 /* Read AdResS Thermo Force table if needed */
3097 if (fr->adress_icor == eAdressICThermoForce)
3099 /* old todo replace */
3101 if (ir->adress->n_tf_grps > 0)
3103 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3108 /* load the default table */
3109 snew(fr->atf_tabs, 1);
3110 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3115 fr->nwall = ir->nwall;
3116 if (ir->nwall && ir->wall_type == ewtTABLE)
3118 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3123 fcd->bondtab = make_bonded_tables(fp,
3124 F_TABBONDS, F_TABBONDSNC,
3126 fcd->angletab = make_bonded_tables(fp,
3129 fcd->dihtab = make_bonded_tables(fp,
3137 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3141 /* QM/MM initialization if requested
3145 fprintf(stderr, "QM/MM calculation requested.\n");
3148 fr->bQMMM = ir->bQMMM;
3149 fr->qr = mk_QMMMrec();
3151 /* Set all the static charge group info */
3152 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3154 &fr->bExcl_IntraCGAll_InterCGNone);
3155 if (DOMAINDECOMP(cr))
3161 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3164 if (!DOMAINDECOMP(cr))
3166 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3167 mtop->natoms, mtop->natoms, mtop->natoms);
3170 fr->print_force = print_force;
3173 /* coarse load balancing vars */
3178 /* Initialize neighbor search */
3179 init_ns(fp, cr, &fr->ns, fr, mtop);
3181 if (cr->duty & DUTY_PP)
3183 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3187 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3192 /* Initialize the thread working data for bonded interactions */
3193 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3195 snew(fr->excl_load, fr->nthreads+1);
3197 if (fr->cutoff_scheme == ecutsVERLET)
3199 if (ir->rcoulomb != ir->rvdw)
3201 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3204 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3207 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3208 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3210 if (ir->eDispCorr != edispcNO)
3212 calc_enervirdiff(fp, ir->eDispCorr, fr);
3216 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3217 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3218 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3220 void pr_forcerec(FILE *fp, t_forcerec *fr)
3224 pr_real(fp, fr->rlist);
3225 pr_real(fp, fr->rcoulomb);
3226 pr_real(fp, fr->fudgeQQ);
3227 pr_bool(fp, fr->bGrid);
3228 pr_bool(fp, fr->bTwinRange);
3229 /*pr_int(fp,fr->cg0);
3230 pr_int(fp,fr->hcg);*/
3231 for (i = 0; i < fr->nnblists; i++)
3233 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3235 pr_real(fp, fr->rcoulomb_switch);
3236 pr_real(fp, fr->rcoulomb);
3241 void forcerec_set_excl_load(t_forcerec *fr,
3242 const gmx_localtop_t *top)
3245 int t, i, j, ntot, n, ntarget;
3247 ind = top->excls.index;
3251 for (i = 0; i < top->excls.nr; i++)
3253 for (j = ind[i]; j < ind[i+1]; j++)
3262 fr->excl_load[0] = 0;
3265 for (t = 1; t <= fr->nthreads; t++)
3267 ntarget = (ntot*t)/fr->nthreads;
3268 while (i < top->excls.nr && n < ntarget)
3270 for (j = ind[i]; j < ind[i+1]; j++)
3279 fr->excl_load[t] = i;