2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/legacyheaders/typedefs.h"
46 #include "gromacs/legacyheaders/types/commrec.h"
47 #include "gromacs/math/vec.h"
48 #include "gromacs/math/utilities.h"
49 #include "gromacs/legacyheaders/macros.h"
50 #include "gromacs/legacyheaders/macros.h"
51 #include "gromacs/math/units.h"
52 #include "gromacs/legacyheaders/force.h"
53 #include "gromacs/legacyheaders/tables.h"
54 #include "gromacs/legacyheaders/nonbonded.h"
55 #include "gromacs/legacyheaders/names.h"
56 #include "gromacs/legacyheaders/network.h"
57 #include "gromacs/legacyheaders/ns.h"
58 #include "gromacs/legacyheaders/txtdump.h"
59 #include "gromacs/legacyheaders/coulomb.h"
60 #include "gromacs/legacyheaders/md_support.h"
61 #include "gromacs/legacyheaders/md_logging.h"
62 #include "gromacs/legacyheaders/domdec.h"
63 #include "gromacs/legacyheaders/qmmm.h"
64 #include "gromacs/legacyheaders/copyrite.h"
65 #include "gromacs/topology/mtop_util.h"
66 #include "nbnxn_simd.h"
67 #include "nbnxn_search.h"
68 #include "nbnxn_atomdata.h"
69 #include "nbnxn_consts.h"
70 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
71 #include "gromacs/legacyheaders/gmx_detect_hardware.h"
72 #include "gromacs/legacyheaders/inputrec.h"
74 #include "gromacs/pbcutil/ishift.h"
75 #include "gromacs/pbcutil/pbc.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/smalloc.h"
79 #include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
80 #include "gromacs/legacyheaders/gpu_utils.h"
81 #include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
82 #include "gromacs/legacyheaders/pmalloc_cuda.h"
83 #include "nb_verlet.h"
85 t_forcerec *mk_forcerec(void)
95 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
99 for (i = 0; (i < atnr); i++)
101 for (j = 0; (j < atnr); j++)
103 fprintf(fp, "%2d - %2d", i, j);
106 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
107 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
111 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
112 C12(nbfp, atnr, i, j)/12.0);
119 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
127 snew(nbfp, 3*atnr*atnr);
128 for (i = k = 0; (i < atnr); i++)
130 for (j = 0; (j < atnr); j++, k++)
132 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
133 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
134 /* nbfp now includes the 6.0 derivative prefactor */
135 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
141 snew(nbfp, 2*atnr*atnr);
142 for (i = k = 0; (i < atnr); i++)
144 for (j = 0; (j < atnr); j++, k++)
146 /* nbfp now includes the 6.0/12.0 derivative prefactors */
147 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
148 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
156 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
159 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
162 /* For LJ-PME simulations, we correct the energies with the reciprocal space
163 * inside of the cut-off. To do this the non-bonded kernels needs to have
164 * access to the C6-values used on the reciprocal grid in pme.c
168 snew(grid, 2*atnr*atnr);
169 for (i = k = 0; (i < atnr); i++)
171 for (j = 0; (j < atnr); j++, k++)
173 c6i = idef->iparams[i*(atnr+1)].lj.c6;
174 c12i = idef->iparams[i*(atnr+1)].lj.c12;
175 c6j = idef->iparams[j*(atnr+1)].lj.c6;
176 c12j = idef->iparams[j*(atnr+1)].lj.c12;
177 c6 = sqrt(c6i * c6j);
178 if (fr->ljpme_combination_rule == eljpmeLB
179 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
181 sigmai = pow(c12i / c6i, 1.0/6.0);
182 sigmaj = pow(c12j / c6j, 1.0/6.0);
183 epsi = c6i * c6i / c12i;
184 epsj = c6j * c6j / c12j;
185 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
187 /* Store the elements at the same relative positions as C6 in nbfp in order
188 * to simplify access in the kernels
190 grid[2*(atnr*i+j)] = c6*6.0;
196 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
200 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
204 snew(nbfp, 2*atnr*atnr);
205 for (i = 0; i < atnr; ++i)
207 for (j = 0; j < atnr; ++j)
209 c6i = idef->iparams[i*(atnr+1)].lj.c6;
210 c12i = idef->iparams[i*(atnr+1)].lj.c12;
211 c6j = idef->iparams[j*(atnr+1)].lj.c6;
212 c12j = idef->iparams[j*(atnr+1)].lj.c12;
213 c6 = sqrt(c6i * c6j);
214 c12 = sqrt(c12i * c12j);
215 if (comb_rule == eCOMB_ARITHMETIC
216 && !gmx_numzero(c6) && !gmx_numzero(c12))
218 sigmai = pow(c12i / c6i, 1.0/6.0);
219 sigmaj = pow(c12j / c6j, 1.0/6.0);
220 epsi = c6i * c6i / c12i;
221 epsj = c6j * c6j / c12j;
222 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
223 c12 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
225 C6(nbfp, atnr, i, j) = c6*6.0;
226 C12(nbfp, atnr, i, j) = c12*12.0;
232 /* This routine sets fr->solvent_opt to the most common solvent in the
233 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
234 * the fr->solvent_type array with the correct type (or esolNO).
236 * Charge groups that fulfill the conditions but are not identical to the
237 * most common one will be marked as esolNO in the solvent_type array.
239 * TIP3p is identical to SPC for these purposes, so we call it
240 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
242 * NOTE: QM particle should not
243 * become an optimized solvent. Not even if there is only one charge
253 } solvent_parameters_t;
256 check_solvent_cg(const gmx_moltype_t *molt,
259 const unsigned char *qm_grpnr,
260 const t_grps *qm_grps,
262 int *n_solvent_parameters,
263 solvent_parameters_t **solvent_parameters_p,
267 const t_blocka *excl;
274 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
275 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
278 solvent_parameters_t *solvent_parameters;
280 /* We use a list with parameters for each solvent type.
281 * Every time we discover a new molecule that fulfills the basic
282 * conditions for a solvent we compare with the previous entries
283 * in these lists. If the parameters are the same we just increment
284 * the counter for that type, and otherwise we create a new type
285 * based on the current molecule.
287 * Once we've finished going through all molecules we check which
288 * solvent is most common, and mark all those molecules while we
289 * clear the flag on all others.
292 solvent_parameters = *solvent_parameters_p;
294 /* Mark the cg first as non optimized */
297 /* Check if this cg has no exclusions with atoms in other charge groups
298 * and all atoms inside the charge group excluded.
299 * We only have 3 or 4 atom solvent loops.
301 if (GET_CGINFO_EXCL_INTER(cginfo) ||
302 !GET_CGINFO_EXCL_INTRA(cginfo))
307 /* Get the indices of the first atom in this charge group */
308 j0 = molt->cgs.index[cg0];
309 j1 = molt->cgs.index[cg0+1];
311 /* Number of atoms in our molecule */
317 "Moltype '%s': there are %d atoms in this charge group\n",
321 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
324 if (nj < 3 || nj > 4)
329 /* Check if we are doing QM on this group */
331 if (qm_grpnr != NULL)
333 for (j = j0; j < j1 && !qm; j++)
335 qm = (qm_grpnr[j] < qm_grps->nr - 1);
338 /* Cannot use solvent optimization with QM */
344 atom = molt->atoms.atom;
346 /* Still looks like a solvent, time to check parameters */
348 /* If it is perturbed (free energy) we can't use the solvent loops,
349 * so then we just skip to the next molecule.
353 for (j = j0; j < j1 && !perturbed; j++)
355 perturbed = PERTURBED(atom[j]);
363 /* Now it's only a question if the VdW and charge parameters
364 * are OK. Before doing the check we compare and see if they are
365 * identical to a possible previous solvent type.
366 * First we assign the current types and charges.
368 for (j = 0; j < nj; j++)
370 tmp_vdwtype[j] = atom[j0+j].type;
371 tmp_charge[j] = atom[j0+j].q;
374 /* Does it match any previous solvent type? */
375 for (k = 0; k < *n_solvent_parameters; k++)
380 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
381 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
382 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
387 /* Check that types & charges match for all atoms in molecule */
388 for (j = 0; j < nj && match == TRUE; j++)
390 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
394 if (tmp_charge[j] != solvent_parameters[k].charge[j])
401 /* Congratulations! We have a matched solvent.
402 * Flag it with this type for later processing.
405 solvent_parameters[k].count += nmol;
407 /* We are done with this charge group */
412 /* If we get here, we have a tentative new solvent type.
413 * Before we add it we must check that it fulfills the requirements
414 * of the solvent optimized loops. First determine which atoms have
417 for (j = 0; j < nj; j++)
420 tjA = tmp_vdwtype[j];
422 /* Go through all other tpes and see if any have non-zero
423 * VdW parameters when combined with this one.
425 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
427 /* We already checked that the atoms weren't perturbed,
428 * so we only need to check state A now.
432 has_vdw[j] = (has_vdw[j] ||
433 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
434 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
435 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
440 has_vdw[j] = (has_vdw[j] ||
441 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
442 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
447 /* Now we know all we need to make the final check and assignment. */
451 * For this we require thatn all atoms have charge,
452 * the charges on atom 2 & 3 should be the same, and only
453 * atom 1 might have VdW.
455 if (has_vdw[1] == FALSE &&
456 has_vdw[2] == FALSE &&
457 tmp_charge[0] != 0 &&
458 tmp_charge[1] != 0 &&
459 tmp_charge[2] == tmp_charge[1])
461 srenew(solvent_parameters, *n_solvent_parameters+1);
462 solvent_parameters[*n_solvent_parameters].model = esolSPC;
463 solvent_parameters[*n_solvent_parameters].count = nmol;
464 for (k = 0; k < 3; k++)
466 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
467 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
470 *cg_sp = *n_solvent_parameters;
471 (*n_solvent_parameters)++;
476 /* Or could it be a TIP4P?
477 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
478 * Only atom 1 mght have VdW.
480 if (has_vdw[1] == FALSE &&
481 has_vdw[2] == FALSE &&
482 has_vdw[3] == FALSE &&
483 tmp_charge[0] == 0 &&
484 tmp_charge[1] != 0 &&
485 tmp_charge[2] == tmp_charge[1] &&
488 srenew(solvent_parameters, *n_solvent_parameters+1);
489 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
490 solvent_parameters[*n_solvent_parameters].count = nmol;
491 for (k = 0; k < 4; k++)
493 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
494 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
497 *cg_sp = *n_solvent_parameters;
498 (*n_solvent_parameters)++;
502 *solvent_parameters_p = solvent_parameters;
506 check_solvent(FILE * fp,
507 const gmx_mtop_t * mtop,
509 cginfo_mb_t *cginfo_mb)
512 const t_block * mols;
513 const gmx_moltype_t *molt;
514 int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
515 int n_solvent_parameters;
516 solvent_parameters_t *solvent_parameters;
522 fprintf(debug, "Going to determine what solvent types we have.\n");
527 n_solvent_parameters = 0;
528 solvent_parameters = NULL;
529 /* Allocate temporary array for solvent type */
530 snew(cg_sp, mtop->nmolblock);
534 for (mb = 0; mb < mtop->nmolblock; mb++)
536 molt = &mtop->moltype[mtop->molblock[mb].type];
538 /* Here we have to loop over all individual molecules
539 * because we need to check for QMMM particles.
541 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
542 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
543 nmol = mtop->molblock[mb].nmol/nmol_ch;
544 for (mol = 0; mol < nmol_ch; mol++)
547 am = mol*cgs->index[cgs->nr];
548 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
550 check_solvent_cg(molt, cg_mol, nmol,
551 mtop->groups.grpnr[egcQMMM] ?
552 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
553 &mtop->groups.grps[egcQMMM],
555 &n_solvent_parameters, &solvent_parameters,
556 cginfo_mb[mb].cginfo[cgm+cg_mol],
557 &cg_sp[mb][cgm+cg_mol]);
560 cg_offset += cgs->nr;
561 at_offset += cgs->index[cgs->nr];
564 /* Puh! We finished going through all charge groups.
565 * Now find the most common solvent model.
568 /* Most common solvent this far */
570 for (i = 0; i < n_solvent_parameters; i++)
573 solvent_parameters[i].count > solvent_parameters[bestsp].count)
581 bestsol = solvent_parameters[bestsp].model;
589 for (mb = 0; mb < mtop->nmolblock; mb++)
591 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
592 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
593 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
595 if (cg_sp[mb][i] == bestsp)
597 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
602 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
609 if (bestsol != esolNO && fp != NULL)
611 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
613 solvent_parameters[bestsp].count);
616 sfree(solvent_parameters);
617 fr->solvent_opt = bestsol;
621 acNONE = 0, acCONSTRAINT, acSETTLE
624 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
625 t_forcerec *fr, gmx_bool bNoSolvOpt,
626 gmx_bool *bFEP_NonBonded,
627 gmx_bool *bExcl_IntraCGAll_InterCGNone)
630 const t_blocka *excl;
631 const gmx_moltype_t *molt;
632 const gmx_molblock_t *molb;
633 cginfo_mb_t *cginfo_mb;
636 int cg_offset, a_offset, cgm, am;
637 int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
641 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
643 ncg_tot = ncg_mtop(mtop);
644 snew(cginfo_mb, mtop->nmolblock);
646 snew(type_VDW, fr->ntype);
647 for (ai = 0; ai < fr->ntype; ai++)
649 type_VDW[ai] = FALSE;
650 for (j = 0; j < fr->ntype; j++)
652 type_VDW[ai] = type_VDW[ai] ||
654 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
655 C12(fr->nbfp, fr->ntype, ai, j) != 0;
659 *bFEP_NonBonded = FALSE;
660 *bExcl_IntraCGAll_InterCGNone = TRUE;
663 snew(bExcl, excl_nalloc);
666 for (mb = 0; mb < mtop->nmolblock; mb++)
668 molb = &mtop->molblock[mb];
669 molt = &mtop->moltype[molb->type];
673 /* Check if the cginfo is identical for all molecules in this block.
674 * If so, we only need an array of the size of one molecule.
675 * Otherwise we make an array of #mol times #cgs per molecule.
679 for (m = 0; m < molb->nmol; m++)
681 am = m*cgs->index[cgs->nr];
682 for (cg = 0; cg < cgs->nr; cg++)
685 a1 = cgs->index[cg+1];
686 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
687 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
691 if (mtop->groups.grpnr[egcQMMM] != NULL)
693 for (ai = a0; ai < a1; ai++)
695 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
696 mtop->groups.grpnr[egcQMMM][a_offset +ai])
705 cginfo_mb[mb].cg_start = cg_offset;
706 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
707 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
708 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
709 cginfo = cginfo_mb[mb].cginfo;
711 /* Set constraints flags for constrained atoms */
712 snew(a_con, molt->atoms.nr);
713 for (ftype = 0; ftype < F_NRE; ftype++)
715 if (interaction_function[ftype].flags & IF_CONSTRAINT)
720 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
724 for (a = 0; a < nral; a++)
726 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
727 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
733 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
736 am = m*cgs->index[cgs->nr];
737 for (cg = 0; cg < cgs->nr; cg++)
740 a1 = cgs->index[cg+1];
742 /* Store the energy group in cginfo */
743 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
744 SET_CGINFO_GID(cginfo[cgm+cg], gid);
746 /* Check the intra/inter charge group exclusions */
747 if (a1-a0 > excl_nalloc)
749 excl_nalloc = a1 - a0;
750 srenew(bExcl, excl_nalloc);
752 /* bExclIntraAll: all intra cg interactions excluded
753 * bExclInter: any inter cg interactions excluded
755 bExclIntraAll = TRUE;
759 bHavePerturbedAtoms = FALSE;
760 for (ai = a0; ai < a1; ai++)
762 /* Check VDW and electrostatic interactions */
763 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
764 type_VDW[molt->atoms.atom[ai].typeB]);
765 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
766 molt->atoms.atom[ai].qB != 0);
768 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
770 /* Clear the exclusion list for atom ai */
771 for (aj = a0; aj < a1; aj++)
773 bExcl[aj-a0] = FALSE;
775 /* Loop over all the exclusions of atom ai */
776 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
779 if (aj < a0 || aj >= a1)
788 /* Check if ai excludes a0 to a1 */
789 for (aj = a0; aj < a1; aj++)
793 bExclIntraAll = FALSE;
800 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
803 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
811 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
815 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
817 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
819 /* The size in cginfo is currently only read with DD */
820 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
824 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
828 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
830 if (bHavePerturbedAtoms && fr->efep != efepNO)
832 SET_CGINFO_FEP(cginfo[cgm+cg]);
833 *bFEP_NonBonded = TRUE;
835 /* Store the charge group size */
836 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
838 if (!bExclIntraAll || bExclInter)
840 *bExcl_IntraCGAll_InterCGNone = FALSE;
847 cg_offset += molb->nmol*cgs->nr;
848 a_offset += molb->nmol*cgs->index[cgs->nr];
852 /* the solvent optimizer is called after the QM is initialized,
853 * because we don't want to have the QM subsystemto become an
857 check_solvent(fplog, mtop, fr, cginfo_mb);
859 if (getenv("GMX_NO_SOLV_OPT"))
863 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
864 "Disabling all solvent optimization\n");
866 fr->solvent_opt = esolNO;
870 fr->solvent_opt = esolNO;
872 if (!fr->solvent_opt)
874 for (mb = 0; mb < mtop->nmolblock; mb++)
876 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
878 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
886 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
891 ncg = cgi_mb[nmb-1].cg_end;
894 for (cg = 0; cg < ncg; cg++)
896 while (cg >= cgi_mb[mb].cg_end)
901 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
907 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
909 /*This now calculates sum for q and c6*/
910 double qsum, q2sum, q, c6sum, c6;
912 const t_atoms *atoms;
917 for (mb = 0; mb < mtop->nmolblock; mb++)
919 nmol = mtop->molblock[mb].nmol;
920 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
921 for (i = 0; i < atoms->nr; i++)
923 q = atoms->atom[i].q;
926 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
931 fr->q2sum[0] = q2sum;
932 fr->c6sum[0] = c6sum;
934 if (fr->efep != efepNO)
939 for (mb = 0; mb < mtop->nmolblock; mb++)
941 nmol = mtop->molblock[mb].nmol;
942 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
943 for (i = 0; i < atoms->nr; i++)
945 q = atoms->atom[i].qB;
948 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
952 fr->q2sum[1] = q2sum;
953 fr->c6sum[1] = c6sum;
958 fr->qsum[1] = fr->qsum[0];
959 fr->q2sum[1] = fr->q2sum[0];
960 fr->c6sum[1] = fr->c6sum[0];
964 if (fr->efep == efepNO)
966 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
970 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
971 fr->qsum[0], fr->qsum[1]);
976 void update_forcerec(t_forcerec *fr, matrix box)
978 if (fr->eeltype == eelGRF)
980 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
981 fr->rcoulomb, fr->temp, fr->zsquare, box,
982 &fr->kappa, &fr->k_rf, &fr->c_rf);
986 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
988 const t_atoms *atoms, *atoms_tpi;
989 const t_blocka *excl;
990 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
991 gmx_int64_t npair, npair_ij, tmpi, tmpj;
992 double csix, ctwelve;
996 real *nbfp_comb = NULL;
1002 /* For LJ-PME, we want to correct for the difference between the
1003 * actual C6 values and the C6 values used by the LJ-PME based on
1004 * combination rules. */
1006 if (EVDW_PME(fr->vdwtype))
1008 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1009 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1010 for (tpi = 0; tpi < ntp; ++tpi)
1012 for (tpj = 0; tpj < ntp; ++tpj)
1014 C6(nbfp_comb, ntp, tpi, tpj) =
1015 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1016 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1021 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1029 /* Count the types so we avoid natoms^2 operations */
1030 snew(typecount, ntp);
1031 gmx_mtop_count_atomtypes(mtop, q, typecount);
1033 for (tpi = 0; tpi < ntp; tpi++)
1035 for (tpj = tpi; tpj < ntp; tpj++)
1037 tmpi = typecount[tpi];
1038 tmpj = typecount[tpj];
1041 npair_ij = tmpi*tmpj;
1045 npair_ij = tmpi*(tmpi - 1)/2;
1049 /* nbfp now includes the 6.0 derivative prefactor */
1050 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1054 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1055 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1056 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1062 /* Subtract the excluded pairs.
1063 * The main reason for substracting exclusions is that in some cases
1064 * some combinations might never occur and the parameters could have
1065 * any value. These unused values should not influence the dispersion
1068 for (mb = 0; mb < mtop->nmolblock; mb++)
1070 nmol = mtop->molblock[mb].nmol;
1071 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1072 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1073 for (i = 0; (i < atoms->nr); i++)
1077 tpi = atoms->atom[i].type;
1081 tpi = atoms->atom[i].typeB;
1083 j1 = excl->index[i];
1084 j2 = excl->index[i+1];
1085 for (j = j1; j < j2; j++)
1092 tpj = atoms->atom[k].type;
1096 tpj = atoms->atom[k].typeB;
1100 /* nbfp now includes the 6.0 derivative prefactor */
1101 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1105 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1106 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1107 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1117 /* Only correct for the interaction of the test particle
1118 * with the rest of the system.
1121 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1124 for (mb = 0; mb < mtop->nmolblock; mb++)
1126 nmol = mtop->molblock[mb].nmol;
1127 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1128 for (j = 0; j < atoms->nr; j++)
1131 /* Remove the interaction of the test charge group
1134 if (mb == mtop->nmolblock-1)
1138 if (mb == 0 && nmol == 1)
1140 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1145 tpj = atoms->atom[j].type;
1149 tpj = atoms->atom[j].typeB;
1151 for (i = 0; i < fr->n_tpi; i++)
1155 tpi = atoms_tpi->atom[i].type;
1159 tpi = atoms_tpi->atom[i].typeB;
1163 /* nbfp now includes the 6.0 derivative prefactor */
1164 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1168 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1169 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1170 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1177 if (npair - nexcl <= 0 && fplog)
1179 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1185 csix /= npair - nexcl;
1186 ctwelve /= npair - nexcl;
1190 fprintf(debug, "Counted %d exclusions\n", nexcl);
1191 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1192 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1194 fr->avcsix[q] = csix;
1195 fr->avctwelve[q] = ctwelve;
1198 if (EVDW_PME(fr->vdwtype))
1205 if (fr->eDispCorr == edispcAllEner ||
1206 fr->eDispCorr == edispcAllEnerPres)
1208 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1209 fr->avcsix[0], fr->avctwelve[0]);
1213 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1219 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1220 const gmx_mtop_t *mtop)
1222 const t_atoms *at1, *at2;
1223 int mt1, mt2, i, j, tpi, tpj, ntypes;
1229 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1236 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1238 at1 = &mtop->moltype[mt1].atoms;
1239 for (i = 0; (i < at1->nr); i++)
1241 tpi = at1->atom[i].type;
1244 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1247 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1249 at2 = &mtop->moltype[mt2].atoms;
1250 for (j = 0; (j < at2->nr); j++)
1252 tpj = at2->atom[j].type;
1255 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1257 b = BHAMB(nbfp, ntypes, tpi, tpj);
1258 if (b > fr->bham_b_max)
1262 if ((b < bmin) || (bmin == -1))
1272 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1273 bmin, fr->bham_b_max);
1277 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1278 t_forcerec *fr, real rtab,
1279 const t_commrec *cr,
1280 const char *tabfn, char *eg1, char *eg2,
1290 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1295 sprintf(buf, "%s", tabfn);
1298 /* Append the two energy group names */
1299 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1300 eg1, eg2, ftp2ext(efXVG));
1302 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1303 /* Copy the contents of the table to separate coulomb and LJ tables too,
1304 * to improve cache performance.
1306 /* For performance reasons we want
1307 * the table data to be aligned to 16-byte. The pointers could be freed
1308 * but currently aren't.
1310 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1311 nbl->table_elec.format = nbl->table_elec_vdw.format;
1312 nbl->table_elec.r = nbl->table_elec_vdw.r;
1313 nbl->table_elec.n = nbl->table_elec_vdw.n;
1314 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1315 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1316 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1317 nbl->table_elec.ninteractions = 1;
1318 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1319 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1321 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1322 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1323 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1324 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1325 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1326 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1327 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1328 nbl->table_vdw.ninteractions = 2;
1329 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1330 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1332 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1334 for (j = 0; j < 4; j++)
1336 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1338 for (j = 0; j < 8; j++)
1340 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1345 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1346 int *ncount, int **count)
1348 const gmx_moltype_t *molt;
1350 int mt, ftype, stride, i, j, tabnr;
1352 for (mt = 0; mt < mtop->nmoltype; mt++)
1354 molt = &mtop->moltype[mt];
1355 for (ftype = 0; ftype < F_NRE; ftype++)
1357 if (ftype == ftype1 || ftype == ftype2)
1359 il = &molt->ilist[ftype];
1360 stride = 1 + NRAL(ftype);
1361 for (i = 0; i < il->nr; i += stride)
1363 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1366 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1368 if (tabnr >= *ncount)
1370 srenew(*count, tabnr+1);
1371 for (j = *ncount; j < tabnr+1; j++)
1384 static bondedtable_t *make_bonded_tables(FILE *fplog,
1385 int ftype1, int ftype2,
1386 const gmx_mtop_t *mtop,
1387 const char *basefn, const char *tabext)
1389 int i, ncount, *count;
1397 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1402 for (i = 0; i < ncount; i++)
1406 sprintf(tabfn, "%s", basefn);
1407 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1408 tabext, i, ftp2ext(efXVG));
1409 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1418 void forcerec_set_ranges(t_forcerec *fr,
1419 int ncg_home, int ncg_force,
1421 int natoms_force_constr, int natoms_f_novirsum)
1426 /* fr->ncg_force is unused in the standard code,
1427 * but it can be useful for modified code dealing with charge groups.
1429 fr->ncg_force = ncg_force;
1430 fr->natoms_force = natoms_force;
1431 fr->natoms_force_constr = natoms_force_constr;
1433 if (fr->natoms_force_constr > fr->nalloc_force)
1435 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1439 srenew(fr->f_twin, fr->nalloc_force);
1443 if (fr->bF_NoVirSum)
1445 fr->f_novirsum_n = natoms_f_novirsum;
1446 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1448 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1449 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1454 fr->f_novirsum_n = 0;
1458 static real cutoff_inf(real cutoff)
1462 cutoff = GMX_CUTOFF_INF;
1468 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1469 t_forcerec *fr, const t_inputrec *ir,
1470 const char *tabfn, const gmx_mtop_t *mtop,
1478 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1482 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1484 sprintf(buf, "%s", tabfn);
1485 for (i = 0; i < ir->adress->n_tf_grps; i++)
1487 j = ir->adress->tf_table_index[i]; /* get energy group index */
1488 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1489 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1492 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1494 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1499 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1506 ir->rcoulomb == 0 &&
1508 ir->ePBC == epbcNONE &&
1509 ir->vdwtype == evdwCUT &&
1510 ir->coulombtype == eelCUT &&
1511 ir->efep == efepNO &&
1512 (ir->implicit_solvent == eisNO ||
1513 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1514 ir->gb_algorithm == egbHCT ||
1515 ir->gb_algorithm == egbOBC))) &&
1516 getenv("GMX_NO_ALLVSALL") == NULL
1519 if (bAllvsAll && ir->opts.ngener > 1)
1521 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1527 fprintf(stderr, "\n%s\n", note);
1531 fprintf(fp, "\n%s\n", note);
1537 if (bAllvsAll && fp && MASTER(cr))
1539 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1546 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1550 /* These thread local data structures are used for bondeds only */
1551 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1553 if (fr->nthreads > 1)
1555 snew(fr->f_t, fr->nthreads);
1556 /* Thread 0 uses the global force and energy arrays */
1557 for (t = 1; t < fr->nthreads; t++)
1559 fr->f_t[t].f = NULL;
1560 fr->f_t[t].f_nalloc = 0;
1561 snew(fr->f_t[t].fshift, SHIFTS);
1562 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1563 for (i = 0; i < egNR; i++)
1565 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1572 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1573 const t_commrec *cr,
1574 const t_inputrec *ir,
1577 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1579 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1580 bGPU ? "GPUs" : "SIMD kernels",
1581 bGPU ? "CPU only" : "plain-C kernels");
1589 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1593 *kernel_type = nbnxnk4x4_PlainC;
1594 *ewald_excl = ewaldexclTable;
1596 #ifdef GMX_NBNXN_SIMD
1598 #ifdef GMX_NBNXN_SIMD_4XN
1599 *kernel_type = nbnxnk4xN_SIMD_4xN;
1601 #ifdef GMX_NBNXN_SIMD_2XNN
1602 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1605 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1606 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1607 * Currently this is based on the SIMD acceleration choice,
1608 * but it might be better to decide this at runtime based on CPU.
1610 * 4xN calculates more (zero) interactions, but has less pair-search
1611 * work and much better kernel instruction scheduling.
1613 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1614 * which doesn't have FMA, both the analytical and tabulated Ewald
1615 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1616 * 2x(4+4) because it results in significantly fewer pairs.
1617 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1618 * 10% with HT, 50% without HT. As we currently don't detect the actual
1619 * use of HT, use 4x8 to avoid a potential performance hit.
1620 * On Intel Haswell 4x8 is always faster.
1622 *kernel_type = nbnxnk4xN_SIMD_4xN;
1624 #ifndef GMX_SIMD_HAVE_FMA
1625 if (EEL_PME_EWALD(ir->coulombtype) ||
1626 EVDW_PME(ir->vdwtype))
1628 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1629 * There are enough instructions to make 2x(4+4) efficient.
1631 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1634 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1637 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1639 #ifdef GMX_NBNXN_SIMD_4XN
1640 *kernel_type = nbnxnk4xN_SIMD_4xN;
1642 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1645 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1647 #ifdef GMX_NBNXN_SIMD_2XNN
1648 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1650 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1654 /* Analytical Ewald exclusion correction is only an option in
1656 * Since table lookup's don't parallelize with SIMD, analytical
1657 * will probably always be faster for a SIMD width of 8 or more.
1658 * With FMA analytical is sometimes faster for a width if 4 as well.
1659 * On BlueGene/Q, this is faster regardless of precision.
1660 * In single precision, this is faster on Bulldozer.
1662 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1663 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1664 defined GMX_SIMD_IBM_QPX
1665 *ewald_excl = ewaldexclAnalytical;
1667 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1669 *ewald_excl = ewaldexclTable;
1671 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1673 *ewald_excl = ewaldexclAnalytical;
1677 #endif /* GMX_NBNXN_SIMD */
1681 const char *lookup_nbnxn_kernel_name(int kernel_type)
1683 const char *returnvalue = NULL;
1684 switch (kernel_type)
1687 returnvalue = "not set";
1689 case nbnxnk4x4_PlainC:
1690 returnvalue = "plain C";
1692 case nbnxnk4xN_SIMD_4xN:
1693 case nbnxnk4xN_SIMD_2xNN:
1694 #ifdef GMX_NBNXN_SIMD
1695 #if defined GMX_SIMD_X86_SSE2
1696 returnvalue = "SSE2";
1697 #elif defined GMX_SIMD_X86_SSE4_1
1698 returnvalue = "SSE4.1";
1699 #elif defined GMX_SIMD_X86_AVX_128_FMA
1700 returnvalue = "AVX_128_FMA";
1701 #elif defined GMX_SIMD_X86_AVX_256
1702 returnvalue = "AVX_256";
1703 #elif defined GMX_SIMD_X86_AVX2_256
1704 returnvalue = "AVX2_256";
1706 returnvalue = "SIMD";
1708 #else /* GMX_NBNXN_SIMD */
1709 returnvalue = "not available";
1710 #endif /* GMX_NBNXN_SIMD */
1712 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1713 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1717 gmx_fatal(FARGS, "Illegal kernel type selected");
1724 static void pick_nbnxn_kernel(FILE *fp,
1725 const t_commrec *cr,
1726 gmx_bool use_simd_kernels,
1728 gmx_bool bEmulateGPU,
1729 const t_inputrec *ir,
1732 gmx_bool bDoNonbonded)
1734 assert(kernel_type);
1736 *kernel_type = nbnxnkNotSet;
1737 *ewald_excl = ewaldexclTable;
1741 *kernel_type = nbnxnk8x8x8_PlainC;
1745 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1750 *kernel_type = nbnxnk8x8x8_CUDA;
1753 if (*kernel_type == nbnxnkNotSet)
1755 /* LJ PME with LB combination rule does 7 mesh operations.
1756 * This so slow that we don't compile SIMD non-bonded kernels for that.
1758 if (use_simd_kernels &&
1759 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1761 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1765 *kernel_type = nbnxnk4x4_PlainC;
1769 if (bDoNonbonded && fp != NULL)
1771 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1772 lookup_nbnxn_kernel_name(*kernel_type),
1773 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1774 nbnxn_kernel_to_cj_size(*kernel_type));
1776 if (nbnxnk4x4_PlainC == *kernel_type ||
1777 nbnxnk8x8x8_PlainC == *kernel_type)
1779 md_print_warn(cr, fp,
1780 "WARNING: Using the slow %s kernels. This should\n"
1781 "not happen during routine usage on supported platforms.\n\n",
1782 lookup_nbnxn_kernel_name(*kernel_type));
1787 static void pick_nbnxn_resources(const t_commrec *cr,
1788 const gmx_hw_info_t *hwinfo,
1789 gmx_bool bDoNonbonded,
1791 gmx_bool *bEmulateGPU,
1792 const gmx_gpu_opt_t *gpu_opt)
1794 gmx_bool bEmulateGPUEnvVarSet;
1795 char gpu_err_str[STRLEN];
1799 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1801 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1802 * GPUs (currently) only handle non-bonded calculations, we will
1803 * automatically switch to emulation if non-bonded calculations are
1804 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1805 * way to turn off GPU initialization, data movement, and cleanup.
1807 * GPU emulation can be useful to assess the performance one can expect by
1808 * adding GPU(s) to the machine. The conditional below allows this even
1809 * if mdrun is compiled without GPU acceleration support.
1810 * Note that you should freezing the system as otherwise it will explode.
1812 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1814 gpu_opt->ncuda_dev_use > 0));
1816 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1818 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1820 /* Each PP node will use the intra-node id-th device from the
1821 * list of detected/selected GPUs. */
1822 if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1823 &hwinfo->gpu_info, gpu_opt))
1825 /* At this point the init should never fail as we made sure that
1826 * we have all the GPUs we need. If it still does, we'll bail. */
1827 gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1829 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1830 cr->rank_pp_intranode),
1834 /* Here we actually turn on hardware GPU acceleration */
1839 gmx_bool uses_simple_tables(int cutoff_scheme,
1840 nonbonded_verlet_t *nbv,
1843 gmx_bool bUsesSimpleTables = TRUE;
1846 switch (cutoff_scheme)
1849 bUsesSimpleTables = TRUE;
1852 assert(NULL != nbv && NULL != nbv->grp);
1853 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1854 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1857 gmx_incons("unimplemented");
1859 return bUsesSimpleTables;
1862 static void init_ewald_f_table(interaction_const_t *ic,
1863 gmx_bool bUsesSimpleTables,
1868 if (bUsesSimpleTables)
1870 /* Get the Ewald table spacing based on Coulomb and/or LJ
1871 * Ewald coefficients and rtol.
1873 ic->tabq_scale = ewald_spline3_table_scale(ic);
1875 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1876 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1880 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1881 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1882 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1885 sfree_aligned(ic->tabq_coul_FDV0);
1886 sfree_aligned(ic->tabq_coul_F);
1887 sfree_aligned(ic->tabq_coul_V);
1889 sfree_aligned(ic->tabq_vdw_FDV0);
1890 sfree_aligned(ic->tabq_vdw_F);
1891 sfree_aligned(ic->tabq_vdw_V);
1893 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1895 /* Create the original table data in FDV0 */
1896 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1897 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1898 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1899 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1900 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1903 if (EVDW_PME(ic->vdwtype))
1905 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1906 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1907 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1908 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1909 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1913 void init_interaction_const_tables(FILE *fp,
1914 interaction_const_t *ic,
1915 gmx_bool bUsesSimpleTables,
1920 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1922 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1926 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1927 1/ic->tabq_scale, ic->tabq_size);
1932 static void clear_force_switch_constants(shift_consts_t *sc)
1939 static void force_switch_constants(real p,
1943 /* Here we determine the coefficient for shifting the force to zero
1944 * between distance rsw and the cut-off rc.
1945 * For a potential of r^-p, we have force p*r^-(p+1).
1946 * But to save flops we absorb p in the coefficient.
1948 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1949 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1951 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1952 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1953 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1956 static void potential_switch_constants(real rsw, real rc,
1957 switch_consts_t *sc)
1959 /* The switch function is 1 at rsw and 0 at rc.
1960 * The derivative and second derivate are zero at both ends.
1961 * rsw = max(r - r_switch, 0)
1962 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1963 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1964 * force = force*dsw - potential*sw
1967 sc->c3 = -10*pow(rc - rsw, -3);
1968 sc->c4 = 15*pow(rc - rsw, -4);
1969 sc->c5 = -6*pow(rc - rsw, -5);
1973 init_interaction_const(FILE *fp,
1974 const t_commrec gmx_unused *cr,
1975 interaction_const_t **interaction_const,
1976 const t_forcerec *fr,
1979 interaction_const_t *ic;
1980 gmx_bool bUsesSimpleTables = TRUE;
1984 /* Just allocate something so we can free it */
1985 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1986 snew_aligned(ic->tabq_coul_F, 16, 32);
1987 snew_aligned(ic->tabq_coul_V, 16, 32);
1989 ic->rlist = fr->rlist;
1990 ic->rlistlong = fr->rlistlong;
1993 ic->vdwtype = fr->vdwtype;
1994 ic->vdw_modifier = fr->vdw_modifier;
1995 ic->rvdw = fr->rvdw;
1996 ic->rvdw_switch = fr->rvdw_switch;
1997 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1998 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1999 ic->sh_lj_ewald = 0;
2000 clear_force_switch_constants(&ic->dispersion_shift);
2001 clear_force_switch_constants(&ic->repulsion_shift);
2003 switch (ic->vdw_modifier)
2005 case eintmodPOTSHIFT:
2006 /* Only shift the potential, don't touch the force */
2007 ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2008 ic->repulsion_shift.cpot = -pow(ic->rvdw, -12.0);
2009 if (EVDW_PME(ic->vdwtype))
2013 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2014 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2017 case eintmodFORCESWITCH:
2018 /* Switch the force, switch and shift the potential */
2019 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2020 &ic->dispersion_shift);
2021 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2022 &ic->repulsion_shift);
2024 case eintmodPOTSWITCH:
2025 /* Switch the potential and force */
2026 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2030 case eintmodEXACTCUTOFF:
2031 /* Nothing to do here */
2034 gmx_incons("unimplemented potential modifier");
2037 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2039 /* Electrostatics */
2040 ic->eeltype = fr->eeltype;
2041 ic->coulomb_modifier = fr->coulomb_modifier;
2042 ic->rcoulomb = fr->rcoulomb;
2043 ic->epsilon_r = fr->epsilon_r;
2044 ic->epsfac = fr->epsfac;
2045 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2047 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2049 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2056 /* Reaction-field */
2057 if (EEL_RF(ic->eeltype))
2059 ic->epsilon_rf = fr->epsilon_rf;
2060 ic->k_rf = fr->k_rf;
2061 ic->c_rf = fr->c_rf;
2065 /* For plain cut-off we might use the reaction-field kernels */
2066 ic->epsilon_rf = ic->epsilon_r;
2068 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2070 ic->c_rf = 1/ic->rcoulomb;
2080 real dispersion_shift;
2082 dispersion_shift = ic->dispersion_shift.cpot;
2083 if (EVDW_PME(ic->vdwtype))
2085 dispersion_shift -= ic->sh_lj_ewald;
2087 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2088 ic->repulsion_shift.cpot, dispersion_shift);
2090 if (ic->eeltype == eelCUT)
2092 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2094 else if (EEL_PME(ic->eeltype))
2096 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2101 *interaction_const = ic;
2103 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2105 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2107 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2108 * also sharing texture references. To keep the code simple, we don't
2109 * treat texture references as shared resources, but this means that
2110 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2111 * Hence, to ensure that the non-bonded kernels don't start before all
2112 * texture binding operations are finished, we need to wait for all ranks
2113 * to arrive here before continuing.
2115 * Note that we could omit this barrier if GPUs are not shared (or
2116 * texture objects are used), but as this is initialization code, there
2117 * is not point in complicating things.
2119 #ifdef GMX_THREAD_MPI
2124 #endif /* GMX_THREAD_MPI */
2127 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2128 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2131 static void init_nb_verlet(FILE *fp,
2132 nonbonded_verlet_t **nb_verlet,
2133 gmx_bool bFEP_NonBonded,
2134 const t_inputrec *ir,
2135 const t_forcerec *fr,
2136 const t_commrec *cr,
2137 const char *nbpu_opt)
2139 nonbonded_verlet_t *nbv;
2142 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2144 nbnxn_alloc_t *nb_alloc;
2145 nbnxn_free_t *nb_free;
2149 pick_nbnxn_resources(cr, fr->hwinfo,
2157 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2158 for (i = 0; i < nbv->ngrp; i++)
2160 nbv->grp[i].nbl_lists.nnbl = 0;
2161 nbv->grp[i].nbat = NULL;
2162 nbv->grp[i].kernel_type = nbnxnkNotSet;
2164 if (i == 0) /* local */
2166 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2167 nbv->bUseGPU, bEmulateGPU, ir,
2168 &nbv->grp[i].kernel_type,
2169 &nbv->grp[i].ewald_excl,
2172 else /* non-local */
2174 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2176 /* Use GPU for local, select a CPU kernel for non-local */
2177 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2179 &nbv->grp[i].kernel_type,
2180 &nbv->grp[i].ewald_excl,
2183 bHybridGPURun = TRUE;
2187 /* Use the same kernel for local and non-local interactions */
2188 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2189 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2196 /* init the NxN GPU data; the last argument tells whether we'll have
2197 * both local and non-local NB calculation on GPU */
2198 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2199 &fr->hwinfo->gpu_info, fr->gpu_opt,
2200 cr->rank_pp_intranode,
2201 (nbv->ngrp > 1) && !bHybridGPURun);
2203 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2207 nbv->min_ci_balanced = strtol(env, &end, 10);
2208 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2210 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2215 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2216 nbv->min_ci_balanced);
2221 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2224 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2225 nbv->min_ci_balanced);
2231 nbv->min_ci_balanced = 0;
2236 nbnxn_init_search(&nbv->nbs,
2237 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2238 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2240 gmx_omp_nthreads_get(emntPairsearch));
2242 for (i = 0; i < nbv->ngrp; i++)
2244 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2246 nb_alloc = &pmalloc;
2255 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2256 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2257 /* 8x8x8 "non-simple" lists are ATM always combined */
2258 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2262 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2264 gmx_bool bSimpleList;
2265 int enbnxninitcombrule;
2267 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2269 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2271 /* Plain LJ cut-off: we can optimize with combination rules */
2272 enbnxninitcombrule = enbnxninitcombruleDETECT;
2274 else if (fr->vdwtype == evdwPME)
2276 /* LJ-PME: we need to use a combination rule for the grid */
2277 if (fr->ljpme_combination_rule == eljpmeGEOM)
2279 enbnxninitcombrule = enbnxninitcombruleGEOM;
2283 enbnxninitcombrule = enbnxninitcombruleLB;
2288 /* We use a full combination matrix: no rule required */
2289 enbnxninitcombrule = enbnxninitcombruleNONE;
2293 snew(nbv->grp[i].nbat, 1);
2294 nbnxn_atomdata_init(fp,
2296 nbv->grp[i].kernel_type,
2298 fr->ntype, fr->nbfp,
2300 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2305 nbv->grp[i].nbat = nbv->grp[0].nbat;
2310 void init_forcerec(FILE *fp,
2311 const output_env_t oenv,
2314 const t_inputrec *ir,
2315 const gmx_mtop_t *mtop,
2316 const t_commrec *cr,
2322 const char *nbpu_opt,
2323 gmx_bool bNoSolvOpt,
2326 int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2331 gmx_bool bGenericKernelOnly;
2332 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2333 gmx_bool bFEP_NonBonded;
2335 int *nm_ind, egp_flags;
2337 if (fr->hwinfo == NULL)
2339 /* Detect hardware, gather information.
2340 * In mdrun, hwinfo has already been set before calling init_forcerec.
2341 * Here we ignore GPUs, as tools will not use them anyhow.
2343 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2346 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2347 fr->use_simd_kernels = TRUE;
2349 fr->bDomDec = DOMAINDECOMP(cr);
2351 natoms = mtop->natoms;
2353 if (check_box(ir->ePBC, box))
2355 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2358 /* Test particle insertion ? */
2361 /* Set to the size of the molecule to be inserted (the last one) */
2362 /* Because of old style topologies, we have to use the last cg
2363 * instead of the last molecule type.
2365 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2366 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2367 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2369 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2377 /* Copy AdResS parameters */
2380 fr->adress_type = ir->adress->type;
2381 fr->adress_const_wf = ir->adress->const_wf;
2382 fr->adress_ex_width = ir->adress->ex_width;
2383 fr->adress_hy_width = ir->adress->hy_width;
2384 fr->adress_icor = ir->adress->icor;
2385 fr->adress_site = ir->adress->site;
2386 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2387 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2390 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2391 for (i = 0; i < ir->adress->n_energy_grps; i++)
2393 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2396 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2397 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2398 for (i = 0; i < fr->n_adress_tf_grps; i++)
2400 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2402 copy_rvec(ir->adress->refs, fr->adress_refs);
2406 fr->adress_type = eAdressOff;
2407 fr->adress_do_hybridpairs = FALSE;
2410 /* Copy the user determined parameters */
2411 fr->userint1 = ir->userint1;
2412 fr->userint2 = ir->userint2;
2413 fr->userint3 = ir->userint3;
2414 fr->userint4 = ir->userint4;
2415 fr->userreal1 = ir->userreal1;
2416 fr->userreal2 = ir->userreal2;
2417 fr->userreal3 = ir->userreal3;
2418 fr->userreal4 = ir->userreal4;
2421 fr->fc_stepsize = ir->fc_stepsize;
2424 fr->efep = ir->efep;
2425 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2426 if (ir->fepvals->bScCoul)
2428 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2429 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2433 fr->sc_alphacoul = 0;
2434 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2436 fr->sc_power = ir->fepvals->sc_power;
2437 fr->sc_r_power = ir->fepvals->sc_r_power;
2438 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2440 env = getenv("GMX_SCSIGMA_MIN");
2444 sscanf(env, "%lf", &dbl);
2445 fr->sc_sigma6_min = pow(dbl, 6);
2448 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2452 fr->bNonbonded = TRUE;
2453 if (getenv("GMX_NO_NONBONDED") != NULL)
2455 /* turn off non-bonded calculations */
2456 fr->bNonbonded = FALSE;
2457 md_print_warn(cr, fp,
2458 "Found environment variable GMX_NO_NONBONDED.\n"
2459 "Disabling nonbonded calculations.\n");
2462 bGenericKernelOnly = FALSE;
2464 /* We now check in the NS code whether a particular combination of interactions
2465 * can be used with water optimization, and disable it if that is not the case.
2468 if (getenv("GMX_NB_GENERIC") != NULL)
2473 "Found environment variable GMX_NB_GENERIC.\n"
2474 "Disabling all interaction-specific nonbonded kernels, will only\n"
2475 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2477 bGenericKernelOnly = TRUE;
2480 if (bGenericKernelOnly == TRUE)
2485 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2487 fr->use_simd_kernels = FALSE;
2491 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2492 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2496 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2498 /* Check if we can/should do all-vs-all kernels */
2499 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2500 fr->AllvsAll_work = NULL;
2501 fr->AllvsAll_workgb = NULL;
2503 /* All-vs-all kernels have not been implemented in 4.6, and
2504 * the SIMD group kernels are also buggy in this case. Non-SIMD
2505 * group kernels are OK. See Redmine #1249. */
2508 fr->bAllvsAll = FALSE;
2509 fr->use_simd_kernels = FALSE;
2513 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2514 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2515 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2516 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2517 "we are proceeding by disabling all CPU architecture-specific\n"
2518 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2519 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2523 /* Neighbour searching stuff */
2524 fr->cutoff_scheme = ir->cutoff_scheme;
2525 fr->bGrid = (ir->ns_type == ensGRID);
2526 fr->ePBC = ir->ePBC;
2528 if (fr->cutoff_scheme == ecutsGROUP)
2530 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2531 "removed in a future release when 'verlet' supports all interaction forms.\n";
2535 fprintf(stderr, "\n%s\n", note);
2539 fprintf(fp, "\n%s\n", note);
2543 /* Determine if we will do PBC for distances in bonded interactions */
2544 if (fr->ePBC == epbcNONE)
2546 fr->bMolPBC = FALSE;
2550 if (!DOMAINDECOMP(cr))
2552 /* The group cut-off scheme and SHAKE assume charge groups
2553 * are whole, but not using molpbc is faster in most cases.
2555 if (fr->cutoff_scheme == ecutsGROUP ||
2556 (ir->eConstrAlg == econtSHAKE &&
2557 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2558 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2560 fr->bMolPBC = ir->bPeriodicMols;
2565 if (getenv("GMX_USE_GRAPH") != NULL)
2567 fr->bMolPBC = FALSE;
2570 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2577 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2580 fr->bGB = (ir->implicit_solvent == eisGBSA);
2582 fr->rc_scaling = ir->refcoord_scaling;
2583 copy_rvec(ir->posres_com, fr->posres_com);
2584 copy_rvec(ir->posres_comB, fr->posres_comB);
2585 fr->rlist = cutoff_inf(ir->rlist);
2586 fr->rlistlong = cutoff_inf(ir->rlistlong);
2587 fr->eeltype = ir->coulombtype;
2588 fr->vdwtype = ir->vdwtype;
2589 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2591 fr->coulomb_modifier = ir->coulomb_modifier;
2592 fr->vdw_modifier = ir->vdw_modifier;
2594 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2595 switch (fr->eeltype)
2598 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2604 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2608 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2609 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2618 case eelPMEUSERSWITCH:
2619 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2624 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2628 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2632 /* Vdw: Translate from mdp settings to kernel format */
2633 switch (fr->vdwtype)
2638 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2642 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2646 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2652 case evdwENCADSHIFT:
2653 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2657 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2661 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2662 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2663 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2665 fr->rvdw = cutoff_inf(ir->rvdw);
2666 fr->rvdw_switch = ir->rvdw_switch;
2667 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2668 fr->rcoulomb_switch = ir->rcoulomb_switch;
2670 fr->bTwinRange = fr->rlistlong > fr->rlist;
2671 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2673 fr->reppow = mtop->ffparams.reppow;
2675 if (ir->cutoff_scheme == ecutsGROUP)
2677 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2678 && !EVDW_PME(fr->vdwtype));
2679 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2680 fr->bcoultab = !(fr->eeltype == eelCUT ||
2681 fr->eeltype == eelEWALD ||
2682 fr->eeltype == eelPME ||
2683 fr->eeltype == eelRF ||
2684 fr->eeltype == eelRF_ZERO);
2686 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2687 * going to be faster to tabulate the interaction than calling the generic kernel.
2688 * However, if generic kernels have been requested we keep things analytically.
2690 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2691 fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2692 bGenericKernelOnly == FALSE)
2694 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2696 fr->bcoultab = TRUE;
2697 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2698 * which would otherwise need two tables.
2702 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2703 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2704 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2705 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2707 if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2709 fr->bcoultab = TRUE;
2713 if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2715 fr->bcoultab = TRUE;
2717 if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2722 if (getenv("GMX_REQUIRE_TABLES"))
2725 fr->bcoultab = TRUE;
2730 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2731 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2734 if (fr->bvdwtab == TRUE)
2736 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2737 fr->nbkernel_vdw_modifier = eintmodNONE;
2739 if (fr->bcoultab == TRUE)
2741 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2742 fr->nbkernel_elec_modifier = eintmodNONE;
2746 if (ir->cutoff_scheme == ecutsVERLET)
2748 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2750 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2752 fr->bvdwtab = FALSE;
2753 fr->bcoultab = FALSE;
2756 /* Tables are used for direct ewald sum */
2759 if (EEL_PME(ir->coulombtype))
2763 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2765 if (ir->coulombtype == eelP3M_AD)
2767 please_cite(fp, "Hockney1988");
2768 please_cite(fp, "Ballenegger2012");
2772 please_cite(fp, "Essmann95a");
2775 if (ir->ewald_geometry == eewg3DC)
2779 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2781 please_cite(fp, "In-Chul99a");
2784 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2785 init_ewald_tab(&(fr->ewald_table), ir, fp);
2788 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2789 1/fr->ewaldcoeff_q);
2793 if (EVDW_PME(ir->vdwtype))
2797 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2799 please_cite(fp, "Essmann95a");
2800 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2803 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2804 1/fr->ewaldcoeff_lj);
2808 /* Electrostatics */
2809 fr->epsilon_r = ir->epsilon_r;
2810 fr->epsilon_rf = ir->epsilon_rf;
2811 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2813 /* Parameters for generalized RF */
2817 if (fr->eeltype == eelGRF)
2819 init_generalized_rf(fp, mtop, ir, fr);
2822 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2823 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2824 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2825 IR_ELEC_FIELD(*ir) ||
2826 (fr->adress_icor != eAdressICOff)
2829 if (fr->cutoff_scheme == ecutsGROUP &&
2830 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2832 /* Count the total number of charge groups */
2833 fr->cg_nalloc = ncg_mtop(mtop);
2834 srenew(fr->cg_cm, fr->cg_nalloc);
2836 if (fr->shift_vec == NULL)
2838 snew(fr->shift_vec, SHIFTS);
2841 if (fr->fshift == NULL)
2843 snew(fr->fshift, SHIFTS);
2846 if (fr->nbfp == NULL)
2848 fr->ntype = mtop->ffparams.atnr;
2849 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2850 if (EVDW_PME(fr->vdwtype))
2852 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2856 /* Copy the energy group exclusions */
2857 fr->egp_flags = ir->opts.egp_flags;
2859 /* Van der Waals stuff */
2860 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2862 if (fr->rvdw_switch >= fr->rvdw)
2864 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2865 fr->rvdw_switch, fr->rvdw);
2869 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2870 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2871 fr->rvdw_switch, fr->rvdw);
2875 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2877 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2880 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2882 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2885 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2887 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2892 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2893 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2896 fr->eDispCorr = ir->eDispCorr;
2897 if (ir->eDispCorr != edispcNO)
2899 set_avcsixtwelve(fp, fr, mtop);
2904 set_bham_b_max(fp, fr, mtop);
2907 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2909 /* Copy the GBSA data (radius, volume and surftens for each
2910 * atomtype) from the topology atomtype section to forcerec.
2912 snew(fr->atype_radius, fr->ntype);
2913 snew(fr->atype_vol, fr->ntype);
2914 snew(fr->atype_surftens, fr->ntype);
2915 snew(fr->atype_gb_radius, fr->ntype);
2916 snew(fr->atype_S_hct, fr->ntype);
2918 if (mtop->atomtypes.nr > 0)
2920 for (i = 0; i < fr->ntype; i++)
2922 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2924 for (i = 0; i < fr->ntype; i++)
2926 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2928 for (i = 0; i < fr->ntype; i++)
2930 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2932 for (i = 0; i < fr->ntype; i++)
2934 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2936 for (i = 0; i < fr->ntype; i++)
2938 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2942 /* Generate the GB table if needed */
2946 fr->gbtabscale = 2000;
2948 fr->gbtabscale = 500;
2952 fr->gbtab = make_gb_table(oenv, fr);
2954 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2956 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2957 if (!DOMAINDECOMP(cr))
2959 make_local_gb(cr, fr->born, ir->gb_algorithm);
2963 /* Set the charge scaling */
2964 if (fr->epsilon_r != 0)
2966 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2970 /* eps = 0 is infinite dieletric: no coulomb interactions */
2974 /* Reaction field constants */
2975 if (EEL_RF(fr->eeltype))
2977 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2978 fr->rcoulomb, fr->temp, fr->zsquare, box,
2979 &fr->kappa, &fr->k_rf, &fr->c_rf);
2982 /*This now calculates sum for q and c6*/
2983 set_chargesum(fp, fr, mtop);
2985 /* if we are using LR electrostatics, and they are tabulated,
2986 * the tables will contain modified coulomb interactions.
2987 * Since we want to use the non-shifted ones for 1-4
2988 * coulombic interactions, we must have an extra set of tables.
2991 /* Construct tables.
2992 * A little unnecessary to make both vdw and coul tables sometimes,
2993 * but what the heck... */
2995 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2996 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2998 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2999 fr->coulomb_modifier != eintmodNONE ||
3000 fr->vdw_modifier != eintmodNONE ||
3001 fr->bBHAM || fr->bEwald) &&
3002 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
3003 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
3004 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
3006 negp_pp = ir->opts.ngener - ir->nwall;
3010 bSomeNormalNbListsAreInUse = TRUE;
3015 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
3016 for (egi = 0; egi < negp_pp; egi++)
3018 for (egj = egi; egj < negp_pp; egj++)
3020 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3021 if (!(egp_flags & EGP_EXCL))
3023 if (egp_flags & EGP_TABLE)
3029 bSomeNormalNbListsAreInUse = TRUE;
3034 if (bSomeNormalNbListsAreInUse)
3036 fr->nnblists = negptable + 1;
3040 fr->nnblists = negptable;
3042 if (fr->nnblists > 1)
3044 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3053 snew(fr->nblists, fr->nnblists);
3055 /* This code automatically gives table length tabext without cut-off's,
3056 * in that case grompp should already have checked that we do not need
3057 * normal tables and we only generate tables for 1-4 interactions.
3059 rtab = ir->rlistlong + ir->tabext;
3063 /* make tables for ordinary interactions */
3064 if (bSomeNormalNbListsAreInUse)
3066 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3069 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3071 if (!bMakeSeparate14Table)
3073 fr->tab14 = fr->nblists[0].table_elec_vdw;
3083 /* Read the special tables for certain energy group pairs */
3084 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3085 for (egi = 0; egi < negp_pp; egi++)
3087 for (egj = egi; egj < negp_pp; egj++)
3089 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3090 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3092 nbl = &(fr->nblists[m]);
3093 if (fr->nnblists > 1)
3095 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3097 /* Read the table file with the two energy groups names appended */
3098 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3099 *mtop->groups.grpname[nm_ind[egi]],
3100 *mtop->groups.grpname[nm_ind[egj]],
3104 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3105 *mtop->groups.grpname[nm_ind[egi]],
3106 *mtop->groups.grpname[nm_ind[egj]],
3107 &fr->nblists[fr->nnblists/2+m]);
3111 else if (fr->nnblists > 1)
3113 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3119 else if ((fr->eDispCorr != edispcNO) &&
3120 ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3121 (fr->vdw_modifier == eintmodFORCESWITCH) ||
3122 (fr->vdw_modifier == eintmodPOTSHIFT)))
3124 /* Tables might not be used for the potential modifier interactions per se, but
3125 * we still need them to evaluate switch/shift dispersion corrections in this case.
3127 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3130 if (bMakeSeparate14Table)
3132 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3133 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3134 GMX_MAKETABLES_14ONLY);
3137 /* Read AdResS Thermo Force table if needed */
3138 if (fr->adress_icor == eAdressICThermoForce)
3140 /* old todo replace */
3142 if (ir->adress->n_tf_grps > 0)
3144 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3149 /* load the default table */
3150 snew(fr->atf_tabs, 1);
3151 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3156 fr->nwall = ir->nwall;
3157 if (ir->nwall && ir->wall_type == ewtTABLE)
3159 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3164 fcd->bondtab = make_bonded_tables(fp,
3165 F_TABBONDS, F_TABBONDSNC,
3167 fcd->angletab = make_bonded_tables(fp,
3170 fcd->dihtab = make_bonded_tables(fp,
3178 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3182 /* QM/MM initialization if requested
3186 fprintf(stderr, "QM/MM calculation requested.\n");
3189 fr->bQMMM = ir->bQMMM;
3190 fr->qr = mk_QMMMrec();
3192 /* Set all the static charge group info */
3193 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3195 &fr->bExcl_IntraCGAll_InterCGNone);
3196 if (DOMAINDECOMP(cr))
3202 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3205 if (!DOMAINDECOMP(cr))
3207 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3208 mtop->natoms, mtop->natoms, mtop->natoms);
3211 fr->print_force = print_force;
3214 /* coarse load balancing vars */
3219 /* Initialize neighbor search */
3220 init_ns(fp, cr, &fr->ns, fr, mtop);
3222 if (cr->duty & DUTY_PP)
3224 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3228 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3233 /* Initialize the thread working data for bonded interactions */
3234 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3236 snew(fr->excl_load, fr->nthreads+1);
3238 if (fr->cutoff_scheme == ecutsVERLET)
3240 if (ir->rcoulomb != ir->rvdw)
3242 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3245 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3248 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3249 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3251 if (ir->eDispCorr != edispcNO)
3253 calc_enervirdiff(fp, ir->eDispCorr, fr);
3257 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3258 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3259 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3261 void pr_forcerec(FILE *fp, t_forcerec *fr)
3265 pr_real(fp, fr->rlist);
3266 pr_real(fp, fr->rcoulomb);
3267 pr_real(fp, fr->fudgeQQ);
3268 pr_bool(fp, fr->bGrid);
3269 pr_bool(fp, fr->bTwinRange);
3270 /*pr_int(fp,fr->cg0);
3271 pr_int(fp,fr->hcg);*/
3272 for (i = 0; i < fr->nnblists; i++)
3274 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3276 pr_real(fp, fr->rcoulomb_switch);
3277 pr_real(fp, fr->rcoulomb);
3282 void forcerec_set_excl_load(t_forcerec *fr,
3283 const gmx_localtop_t *top)
3286 int t, i, j, ntot, n, ntarget;
3288 ind = top->excls.index;
3292 for (i = 0; i < top->excls.nr; i++)
3294 for (j = ind[i]; j < ind[i+1]; j++)
3303 fr->excl_load[0] = 0;
3306 for (t = 1; t <= fr->nthreads; t++)
3308 ntarget = (ntot*t)/fr->nthreads;
3309 while (i < top->excls.nr && n < ntarget)
3311 for (j = ind[i]; j < ind[i+1]; j++)
3320 fr->excl_load[t] = i;
3324 /* Frees GPU memory and destroys the CUDA context.
3326 * Note that this function needs to be called even if GPUs are not used
3327 * in this run because the PME ranks have no knowledge of whether GPUs
3328 * are used or not, but all ranks need to enter the barrier below.
3330 void free_gpu_resources(const t_forcerec *fr,
3331 const t_commrec *cr)
3333 gmx_bool bIsPPrankUsingGPU;
3334 char gpu_err_str[STRLEN];
3336 bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
3338 if (bIsPPrankUsingGPU)
3340 /* free nbnxn data in GPU memory */
3341 nbnxn_cuda_free(fr->nbv->cu_nbv);
3343 /* With tMPI we need to wait for all ranks to finish deallocation before
3344 * destroying the context in free_gpu() as some ranks may be sharing
3346 * Note: as only PP ranks need to free GPU resources, so it is safe to
3347 * not call the barrier on PME ranks.
3349 #ifdef GMX_THREAD_MPI
3354 #endif /* GMX_THREAD_MPI */
3356 /* uninitialize GPU (by destroying the context) */
3357 if (!free_gpu(gpu_err_str))
3359 gmx_warning("On rank %d failed to free GPU #%d: %s",
3360 cr->nodeid, get_current_gpu_device_id(), gpu_err_str);