2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/domdec/domdec.h"
46 #include "gromacs/ewald/ewald.h"
47 #include "gromacs/legacyheaders/copyrite.h"
48 #include "gromacs/legacyheaders/force.h"
49 #include "gromacs/legacyheaders/gmx_detect_hardware.h"
50 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
51 #include "gromacs/legacyheaders/gpu_utils.h"
52 #include "gromacs/legacyheaders/inputrec.h"
53 #include "gromacs/legacyheaders/macros.h"
54 #include "gromacs/legacyheaders/md_logging.h"
55 #include "gromacs/legacyheaders/md_support.h"
56 #include "gromacs/legacyheaders/names.h"
57 #include "gromacs/legacyheaders/network.h"
58 #include "gromacs/legacyheaders/nonbonded.h"
59 #include "gromacs/legacyheaders/ns.h"
60 #include "gromacs/legacyheaders/pmalloc_cuda.h"
61 #include "gromacs/legacyheaders/qmmm.h"
62 #include "gromacs/legacyheaders/tables.h"
63 #include "gromacs/legacyheaders/txtdump.h"
64 #include "gromacs/legacyheaders/typedefs.h"
65 #include "gromacs/legacyheaders/types/commrec.h"
66 #include "gromacs/legacyheaders/types/nbnxn_cuda_types_ext.h"
67 #include "gromacs/math/calculate-ewald-splitting-coefficient.h"
68 #include "gromacs/math/units.h"
69 #include "gromacs/math/utilities.h"
70 #include "gromacs/math/vec.h"
71 #include "gromacs/mdlib/forcerec-threading.h"
72 #include "gromacs/mdlib/nb_verlet.h"
73 #include "gromacs/mdlib/nbnxn_atomdata.h"
74 #include "gromacs/mdlib/nbnxn_consts.h"
75 #include "gromacs/mdlib/nbnxn_search.h"
76 #include "gromacs/mdlib/nbnxn_simd.h"
77 #include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
78 #include "gromacs/pbcutil/ishift.h"
79 #include "gromacs/pbcutil/pbc.h"
80 #include "gromacs/topology/mtop_util.h"
81 #include "gromacs/utility/fatalerror.h"
82 #include "gromacs/utility/smalloc.h"
84 t_forcerec *mk_forcerec(void)
94 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
98 for (i = 0; (i < atnr); i++)
100 for (j = 0; (j < atnr); j++)
102 fprintf(fp, "%2d - %2d", i, j);
105 fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
106 BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
110 fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
111 C12(nbfp, atnr, i, j)/12.0);
118 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
126 snew(nbfp, 3*atnr*atnr);
127 for (i = k = 0; (i < atnr); i++)
129 for (j = 0; (j < atnr); j++, k++)
131 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
132 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
133 /* nbfp now includes the 6.0 derivative prefactor */
134 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
140 snew(nbfp, 2*atnr*atnr);
141 for (i = k = 0; (i < atnr); i++)
143 for (j = 0; (j < atnr); j++, k++)
145 /* nbfp now includes the 6.0/12.0 derivative prefactors */
146 C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0;
147 C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0;
155 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
158 real c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
160 const real oneOverSix = 1.0 / 6.0;
162 /* For LJ-PME simulations, we correct the energies with the reciprocal space
163 * inside of the cut-off. To do this the non-bonded kernels needs to have
164 * access to the C6-values used on the reciprocal grid in pme.c
168 snew(grid, 2*atnr*atnr);
169 for (i = k = 0; (i < atnr); i++)
171 for (j = 0; (j < atnr); j++, k++)
173 c6i = idef->iparams[i*(atnr+1)].lj.c6;
174 c12i = idef->iparams[i*(atnr+1)].lj.c12;
175 c6j = idef->iparams[j*(atnr+1)].lj.c6;
176 c12j = idef->iparams[j*(atnr+1)].lj.c12;
177 c6 = sqrt(c6i * c6j);
178 if (fr->ljpme_combination_rule == eljpmeLB
179 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
181 sigmai = pow(c12i / c6i, oneOverSix);
182 sigmaj = pow(c12j / c6j, oneOverSix);
183 epsi = c6i * c6i / c12i;
184 epsj = c6j * c6j / c12j;
185 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
187 /* Store the elements at the same relative positions as C6 in nbfp in order
188 * to simplify access in the kernels
190 grid[2*(atnr*i+j)] = c6*6.0;
196 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
200 real c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
202 const real oneOverSix = 1.0 / 6.0;
205 snew(nbfp, 2*atnr*atnr);
206 for (i = 0; i < atnr; ++i)
208 for (j = 0; j < atnr; ++j)
210 c6i = idef->iparams[i*(atnr+1)].lj.c6;
211 c12i = idef->iparams[i*(atnr+1)].lj.c12;
212 c6j = idef->iparams[j*(atnr+1)].lj.c6;
213 c12j = idef->iparams[j*(atnr+1)].lj.c12;
214 c6 = sqrt(c6i * c6j);
215 c12 = sqrt(c12i * c12j);
216 if (comb_rule == eCOMB_ARITHMETIC
217 && !gmx_numzero(c6) && !gmx_numzero(c12))
219 sigmai = pow(c12i / c6i, oneOverSix);
220 sigmaj = pow(c12j / c6j, oneOverSix);
221 epsi = c6i * c6i / c12i;
222 epsj = c6j * c6j / c12j;
223 c6 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
224 c12 = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
226 C6(nbfp, atnr, i, j) = c6*6.0;
227 C12(nbfp, atnr, i, j) = c12*12.0;
233 /* This routine sets fr->solvent_opt to the most common solvent in the
234 * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
235 * the fr->solvent_type array with the correct type (or esolNO).
237 * Charge groups that fulfill the conditions but are not identical to the
238 * most common one will be marked as esolNO in the solvent_type array.
240 * TIP3p is identical to SPC for these purposes, so we call it
241 * SPC in the arrays (Apologies to Bill Jorgensen ;-)
243 * NOTE: QM particle should not
244 * become an optimized solvent. Not even if there is only one charge
254 } solvent_parameters_t;
257 check_solvent_cg(const gmx_moltype_t *molt,
260 const unsigned char *qm_grpnr,
261 const t_grps *qm_grps,
263 int *n_solvent_parameters,
264 solvent_parameters_t **solvent_parameters_p,
274 real tmp_charge[4] = { 0.0 }; /* init to zero to make gcc4.8 happy */
275 int tmp_vdwtype[4] = { 0 }; /* init to zero to make gcc4.8 happy */
278 solvent_parameters_t *solvent_parameters;
280 /* We use a list with parameters for each solvent type.
281 * Every time we discover a new molecule that fulfills the basic
282 * conditions for a solvent we compare with the previous entries
283 * in these lists. If the parameters are the same we just increment
284 * the counter for that type, and otherwise we create a new type
285 * based on the current molecule.
287 * Once we've finished going through all molecules we check which
288 * solvent is most common, and mark all those molecules while we
289 * clear the flag on all others.
292 solvent_parameters = *solvent_parameters_p;
294 /* Mark the cg first as non optimized */
297 /* Check if this cg has no exclusions with atoms in other charge groups
298 * and all atoms inside the charge group excluded.
299 * We only have 3 or 4 atom solvent loops.
301 if (GET_CGINFO_EXCL_INTER(cginfo) ||
302 !GET_CGINFO_EXCL_INTRA(cginfo))
307 /* Get the indices of the first atom in this charge group */
308 j0 = molt->cgs.index[cg0];
309 j1 = molt->cgs.index[cg0+1];
311 /* Number of atoms in our molecule */
317 "Moltype '%s': there are %d atoms in this charge group\n",
321 /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
324 if (nj < 3 || nj > 4)
329 /* Check if we are doing QM on this group */
331 if (qm_grpnr != NULL)
333 for (j = j0; j < j1 && !qm; j++)
335 qm = (qm_grpnr[j] < qm_grps->nr - 1);
338 /* Cannot use solvent optimization with QM */
344 atom = molt->atoms.atom;
346 /* Still looks like a solvent, time to check parameters */
348 /* If it is perturbed (free energy) we can't use the solvent loops,
349 * so then we just skip to the next molecule.
353 for (j = j0; j < j1 && !perturbed; j++)
355 perturbed = PERTURBED(atom[j]);
363 /* Now it's only a question if the VdW and charge parameters
364 * are OK. Before doing the check we compare and see if they are
365 * identical to a possible previous solvent type.
366 * First we assign the current types and charges.
368 for (j = 0; j < nj; j++)
370 tmp_vdwtype[j] = atom[j0+j].type;
371 tmp_charge[j] = atom[j0+j].q;
374 /* Does it match any previous solvent type? */
375 for (k = 0; k < *n_solvent_parameters; k++)
380 /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
381 if ( (solvent_parameters[k].model == esolSPC && nj != 3) ||
382 (solvent_parameters[k].model == esolTIP4P && nj != 4) )
387 /* Check that types & charges match for all atoms in molecule */
388 for (j = 0; j < nj && match == TRUE; j++)
390 if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
394 if (tmp_charge[j] != solvent_parameters[k].charge[j])
401 /* Congratulations! We have a matched solvent.
402 * Flag it with this type for later processing.
405 solvent_parameters[k].count += nmol;
407 /* We are done with this charge group */
412 /* If we get here, we have a tentative new solvent type.
413 * Before we add it we must check that it fulfills the requirements
414 * of the solvent optimized loops. First determine which atoms have
417 for (j = 0; j < nj; j++)
420 tjA = tmp_vdwtype[j];
422 /* Go through all other tpes and see if any have non-zero
423 * VdW parameters when combined with this one.
425 for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
427 /* We already checked that the atoms weren't perturbed,
428 * so we only need to check state A now.
432 has_vdw[j] = (has_vdw[j] ||
433 (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
434 (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
435 (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
440 has_vdw[j] = (has_vdw[j] ||
441 (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
442 (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
447 /* Now we know all we need to make the final check and assignment. */
451 * For this we require thatn all atoms have charge,
452 * the charges on atom 2 & 3 should be the same, and only
453 * atom 1 might have VdW.
455 if (has_vdw[1] == FALSE &&
456 has_vdw[2] == FALSE &&
457 tmp_charge[0] != 0 &&
458 tmp_charge[1] != 0 &&
459 tmp_charge[2] == tmp_charge[1])
461 srenew(solvent_parameters, *n_solvent_parameters+1);
462 solvent_parameters[*n_solvent_parameters].model = esolSPC;
463 solvent_parameters[*n_solvent_parameters].count = nmol;
464 for (k = 0; k < 3; k++)
466 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
467 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
470 *cg_sp = *n_solvent_parameters;
471 (*n_solvent_parameters)++;
476 /* Or could it be a TIP4P?
477 * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
478 * Only atom 1 mght have VdW.
480 if (has_vdw[1] == FALSE &&
481 has_vdw[2] == FALSE &&
482 has_vdw[3] == FALSE &&
483 tmp_charge[0] == 0 &&
484 tmp_charge[1] != 0 &&
485 tmp_charge[2] == tmp_charge[1] &&
488 srenew(solvent_parameters, *n_solvent_parameters+1);
489 solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
490 solvent_parameters[*n_solvent_parameters].count = nmol;
491 for (k = 0; k < 4; k++)
493 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
494 solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k];
497 *cg_sp = *n_solvent_parameters;
498 (*n_solvent_parameters)++;
502 *solvent_parameters_p = solvent_parameters;
506 check_solvent(FILE * fp,
507 const gmx_mtop_t * mtop,
509 cginfo_mb_t *cginfo_mb)
512 const gmx_moltype_t *molt;
513 int mb, mol, cg_mol, at_offset, am, cgm, i, nmol_ch, nmol;
514 int n_solvent_parameters;
515 solvent_parameters_t *solvent_parameters;
521 fprintf(debug, "Going to determine what solvent types we have.\n");
524 n_solvent_parameters = 0;
525 solvent_parameters = NULL;
526 /* Allocate temporary array for solvent type */
527 snew(cg_sp, mtop->nmolblock);
530 for (mb = 0; mb < mtop->nmolblock; mb++)
532 molt = &mtop->moltype[mtop->molblock[mb].type];
534 /* Here we have to loop over all individual molecules
535 * because we need to check for QMMM particles.
537 snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
538 nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
539 nmol = mtop->molblock[mb].nmol/nmol_ch;
540 for (mol = 0; mol < nmol_ch; mol++)
543 am = mol*cgs->index[cgs->nr];
544 for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
546 check_solvent_cg(molt, cg_mol, nmol,
547 mtop->groups.grpnr[egcQMMM] ?
548 mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
549 &mtop->groups.grps[egcQMMM],
551 &n_solvent_parameters, &solvent_parameters,
552 cginfo_mb[mb].cginfo[cgm+cg_mol],
553 &cg_sp[mb][cgm+cg_mol]);
556 at_offset += cgs->index[cgs->nr];
559 /* Puh! We finished going through all charge groups.
560 * Now find the most common solvent model.
563 /* Most common solvent this far */
565 for (i = 0; i < n_solvent_parameters; i++)
568 solvent_parameters[i].count > solvent_parameters[bestsp].count)
576 bestsol = solvent_parameters[bestsp].model;
584 for (mb = 0; mb < mtop->nmolblock; mb++)
586 cgs = &mtop->moltype[mtop->molblock[mb].type].cgs;
587 nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
588 for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
590 if (cg_sp[mb][i] == bestsp)
592 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
597 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
604 if (bestsol != esolNO && fp != NULL)
606 fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
608 solvent_parameters[bestsp].count);
611 sfree(solvent_parameters);
612 fr->solvent_opt = bestsol;
616 acNONE = 0, acCONSTRAINT, acSETTLE
619 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
620 t_forcerec *fr, gmx_bool bNoSolvOpt,
621 gmx_bool *bFEP_NonBonded,
622 gmx_bool *bExcl_IntraCGAll_InterCGNone)
625 const t_blocka *excl;
626 const gmx_moltype_t *molt;
627 const gmx_molblock_t *molb;
628 cginfo_mb_t *cginfo_mb;
631 int cg_offset, a_offset;
632 int mb, m, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
636 gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
638 snew(cginfo_mb, mtop->nmolblock);
640 snew(type_VDW, fr->ntype);
641 for (ai = 0; ai < fr->ntype; ai++)
643 type_VDW[ai] = FALSE;
644 for (j = 0; j < fr->ntype; j++)
646 type_VDW[ai] = type_VDW[ai] ||
648 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
649 C12(fr->nbfp, fr->ntype, ai, j) != 0;
653 *bFEP_NonBonded = FALSE;
654 *bExcl_IntraCGAll_InterCGNone = TRUE;
657 snew(bExcl, excl_nalloc);
660 for (mb = 0; mb < mtop->nmolblock; mb++)
662 molb = &mtop->molblock[mb];
663 molt = &mtop->moltype[molb->type];
667 /* Check if the cginfo is identical for all molecules in this block.
668 * If so, we only need an array of the size of one molecule.
669 * Otherwise we make an array of #mol times #cgs per molecule.
672 for (m = 0; m < molb->nmol; m++)
674 int am = m*cgs->index[cgs->nr];
675 for (cg = 0; cg < cgs->nr; cg++)
678 a1 = cgs->index[cg+1];
679 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
680 ggrpnr(&mtop->groups, egcENER, a_offset +a0))
684 if (mtop->groups.grpnr[egcQMMM] != NULL)
686 for (ai = a0; ai < a1; ai++)
688 if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
689 mtop->groups.grpnr[egcQMMM][a_offset +ai])
698 cginfo_mb[mb].cg_start = cg_offset;
699 cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr;
700 cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr;
701 snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
702 cginfo = cginfo_mb[mb].cginfo;
704 /* Set constraints flags for constrained atoms */
705 snew(a_con, molt->atoms.nr);
706 for (ftype = 0; ftype < F_NRE; ftype++)
708 if (interaction_function[ftype].flags & IF_CONSTRAINT)
713 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
717 for (a = 0; a < nral; a++)
719 a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
720 (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
726 for (m = 0; m < (bId ? 1 : molb->nmol); m++)
729 int am = m*cgs->index[cgs->nr];
730 for (cg = 0; cg < cgs->nr; cg++)
733 a1 = cgs->index[cg+1];
735 /* Store the energy group in cginfo */
736 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
737 SET_CGINFO_GID(cginfo[cgm+cg], gid);
739 /* Check the intra/inter charge group exclusions */
740 if (a1-a0 > excl_nalloc)
742 excl_nalloc = a1 - a0;
743 srenew(bExcl, excl_nalloc);
745 /* bExclIntraAll: all intra cg interactions excluded
746 * bExclInter: any inter cg interactions excluded
748 bExclIntraAll = TRUE;
752 bHavePerturbedAtoms = FALSE;
753 for (ai = a0; ai < a1; ai++)
755 /* Check VDW and electrostatic interactions */
756 bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
757 type_VDW[molt->atoms.atom[ai].typeB]);
758 bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 ||
759 molt->atoms.atom[ai].qB != 0);
761 bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
763 /* Clear the exclusion list for atom ai */
764 for (aj = a0; aj < a1; aj++)
766 bExcl[aj-a0] = FALSE;
768 /* Loop over all the exclusions of atom ai */
769 for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
772 if (aj < a0 || aj >= a1)
781 /* Check if ai excludes a0 to a1 */
782 for (aj = a0; aj < a1; aj++)
786 bExclIntraAll = FALSE;
793 SET_CGINFO_CONSTR(cginfo[cgm+cg]);
796 SET_CGINFO_SETTLE(cginfo[cgm+cg]);
804 SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
808 SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
810 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
812 /* The size in cginfo is currently only read with DD */
813 gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
817 SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
821 SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
823 if (bHavePerturbedAtoms && fr->efep != efepNO)
825 SET_CGINFO_FEP(cginfo[cgm+cg]);
826 *bFEP_NonBonded = TRUE;
828 /* Store the charge group size */
829 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
831 if (!bExclIntraAll || bExclInter)
833 *bExcl_IntraCGAll_InterCGNone = FALSE;
840 cg_offset += molb->nmol*cgs->nr;
841 a_offset += molb->nmol*cgs->index[cgs->nr];
845 /* the solvent optimizer is called after the QM is initialized,
846 * because we don't want to have the QM subsystemto become an
850 check_solvent(fplog, mtop, fr, cginfo_mb);
852 if (getenv("GMX_NO_SOLV_OPT"))
856 fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
857 "Disabling all solvent optimization\n");
859 fr->solvent_opt = esolNO;
863 fr->solvent_opt = esolNO;
865 if (!fr->solvent_opt)
867 for (mb = 0; mb < mtop->nmolblock; mb++)
869 for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
871 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
879 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
884 ncg = cgi_mb[nmb-1].cg_end;
887 for (cg = 0; cg < ncg; cg++)
889 while (cg >= cgi_mb[mb].cg_end)
894 cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
900 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
902 /*This now calculates sum for q and c6*/
903 double qsum, q2sum, q, c6sum, c6;
905 const t_atoms *atoms;
910 for (mb = 0; mb < mtop->nmolblock; mb++)
912 nmol = mtop->molblock[mb].nmol;
913 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
914 for (i = 0; i < atoms->nr; i++)
916 q = atoms->atom[i].q;
919 c6 = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
924 fr->q2sum[0] = q2sum;
925 fr->c6sum[0] = c6sum;
927 if (fr->efep != efepNO)
932 for (mb = 0; mb < mtop->nmolblock; mb++)
934 nmol = mtop->molblock[mb].nmol;
935 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
936 for (i = 0; i < atoms->nr; i++)
938 q = atoms->atom[i].qB;
941 c6 = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
945 fr->q2sum[1] = q2sum;
946 fr->c6sum[1] = c6sum;
951 fr->qsum[1] = fr->qsum[0];
952 fr->q2sum[1] = fr->q2sum[0];
953 fr->c6sum[1] = fr->c6sum[0];
957 if (fr->efep == efepNO)
959 fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
963 fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
964 fr->qsum[0], fr->qsum[1]);
969 void update_forcerec(t_forcerec *fr, matrix box)
971 if (fr->eeltype == eelGRF)
973 calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
974 fr->rcoulomb, fr->temp, fr->zsquare, box,
975 &fr->kappa, &fr->k_rf, &fr->c_rf);
979 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
981 const t_atoms *atoms, *atoms_tpi;
982 const t_blocka *excl;
983 int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, nexcl, q;
984 gmx_int64_t npair, npair_ij, tmpi, tmpj;
985 double csix, ctwelve;
989 real *nbfp_comb = NULL;
995 /* For LJ-PME, we want to correct for the difference between the
996 * actual C6 values and the C6 values used by the LJ-PME based on
997 * combination rules. */
999 if (EVDW_PME(fr->vdwtype))
1001 nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1002 (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1003 for (tpi = 0; tpi < ntp; ++tpi)
1005 for (tpj = 0; tpj < ntp; ++tpj)
1007 C6(nbfp_comb, ntp, tpi, tpj) =
1008 C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1009 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1014 for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1022 /* Count the types so we avoid natoms^2 operations */
1023 snew(typecount, ntp);
1024 gmx_mtop_count_atomtypes(mtop, q, typecount);
1026 for (tpi = 0; tpi < ntp; tpi++)
1028 for (tpj = tpi; tpj < ntp; tpj++)
1030 tmpi = typecount[tpi];
1031 tmpj = typecount[tpj];
1034 npair_ij = tmpi*tmpj;
1038 npair_ij = tmpi*(tmpi - 1)/2;
1042 /* nbfp now includes the 6.0 derivative prefactor */
1043 csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1047 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1048 csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0;
1049 ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0;
1055 /* Subtract the excluded pairs.
1056 * The main reason for substracting exclusions is that in some cases
1057 * some combinations might never occur and the parameters could have
1058 * any value. These unused values should not influence the dispersion
1061 for (mb = 0; mb < mtop->nmolblock; mb++)
1063 nmol = mtop->molblock[mb].nmol;
1064 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1065 excl = &mtop->moltype[mtop->molblock[mb].type].excls;
1066 for (i = 0; (i < atoms->nr); i++)
1070 tpi = atoms->atom[i].type;
1074 tpi = atoms->atom[i].typeB;
1076 j1 = excl->index[i];
1077 j2 = excl->index[i+1];
1078 for (j = j1; j < j2; j++)
1085 tpj = atoms->atom[k].type;
1089 tpj = atoms->atom[k].typeB;
1093 /* nbfp now includes the 6.0 derivative prefactor */
1094 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1098 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1099 csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1100 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1110 /* Only correct for the interaction of the test particle
1111 * with the rest of the system.
1114 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1117 for (mb = 0; mb < mtop->nmolblock; mb++)
1119 nmol = mtop->molblock[mb].nmol;
1120 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1121 for (j = 0; j < atoms->nr; j++)
1124 /* Remove the interaction of the test charge group
1127 if (mb == mtop->nmolblock-1)
1131 if (mb == 0 && nmol == 1)
1133 gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1138 tpj = atoms->atom[j].type;
1142 tpj = atoms->atom[j].typeB;
1144 for (i = 0; i < fr->n_tpi; i++)
1148 tpi = atoms_tpi->atom[i].type;
1152 tpi = atoms_tpi->atom[i].typeB;
1156 /* nbfp now includes the 6.0 derivative prefactor */
1157 csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1161 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1162 csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1163 ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1170 if (npair - nexcl <= 0 && fplog)
1172 fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1178 csix /= npair - nexcl;
1179 ctwelve /= npair - nexcl;
1183 fprintf(debug, "Counted %d exclusions\n", nexcl);
1184 fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1185 fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1187 fr->avcsix[q] = csix;
1188 fr->avctwelve[q] = ctwelve;
1191 if (EVDW_PME(fr->vdwtype))
1198 if (fr->eDispCorr == edispcAllEner ||
1199 fr->eDispCorr == edispcAllEnerPres)
1201 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1202 fr->avcsix[0], fr->avctwelve[0]);
1206 fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1212 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1213 const gmx_mtop_t *mtop)
1215 const t_atoms *at1, *at2;
1216 int mt1, mt2, i, j, tpi, tpj, ntypes;
1222 fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1229 for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1231 at1 = &mtop->moltype[mt1].atoms;
1232 for (i = 0; (i < at1->nr); i++)
1234 tpi = at1->atom[i].type;
1237 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1240 for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1242 at2 = &mtop->moltype[mt2].atoms;
1243 for (j = 0; (j < at2->nr); j++)
1245 tpj = at2->atom[j].type;
1248 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1250 b = BHAMB(nbfp, ntypes, tpi, tpj);
1251 if (b > fr->bham_b_max)
1255 if ((b < bmin) || (bmin == -1))
1265 fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1266 bmin, fr->bham_b_max);
1270 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1271 t_forcerec *fr, real rtab,
1272 const t_commrec *cr,
1273 const char *tabfn, char *eg1, char *eg2,
1283 fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1288 sprintf(buf, "%s", tabfn);
1291 /* Append the two energy group names */
1292 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1293 eg1, eg2, ftp2ext(efXVG));
1295 nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1296 /* Copy the contents of the table to separate coulomb and LJ tables too,
1297 * to improve cache performance.
1299 /* For performance reasons we want
1300 * the table data to be aligned to 16-byte. The pointers could be freed
1301 * but currently aren't.
1303 nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC;
1304 nbl->table_elec.format = nbl->table_elec_vdw.format;
1305 nbl->table_elec.r = nbl->table_elec_vdw.r;
1306 nbl->table_elec.n = nbl->table_elec_vdw.n;
1307 nbl->table_elec.scale = nbl->table_elec_vdw.scale;
1308 nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp;
1309 nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize;
1310 nbl->table_elec.ninteractions = 1;
1311 nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1312 snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1314 nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1315 nbl->table_vdw.format = nbl->table_elec_vdw.format;
1316 nbl->table_vdw.r = nbl->table_elec_vdw.r;
1317 nbl->table_vdw.n = nbl->table_elec_vdw.n;
1318 nbl->table_vdw.scale = nbl->table_elec_vdw.scale;
1319 nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp;
1320 nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize;
1321 nbl->table_vdw.ninteractions = 2;
1322 nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1323 snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1325 for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1327 for (j = 0; j < 4; j++)
1329 nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1331 for (j = 0; j < 8; j++)
1333 nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1338 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1339 int *ncount, int **count)
1341 const gmx_moltype_t *molt;
1343 int mt, ftype, stride, i, j, tabnr;
1345 for (mt = 0; mt < mtop->nmoltype; mt++)
1347 molt = &mtop->moltype[mt];
1348 for (ftype = 0; ftype < F_NRE; ftype++)
1350 if (ftype == ftype1 || ftype == ftype2)
1352 il = &molt->ilist[ftype];
1353 stride = 1 + NRAL(ftype);
1354 for (i = 0; i < il->nr; i += stride)
1356 tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1359 gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1361 if (tabnr >= *ncount)
1363 srenew(*count, tabnr+1);
1364 for (j = *ncount; j < tabnr+1; j++)
1377 static bondedtable_t *make_bonded_tables(FILE *fplog,
1378 int ftype1, int ftype2,
1379 const gmx_mtop_t *mtop,
1380 const char *basefn, const char *tabext)
1382 int i, ncount, *count;
1390 count_tables(ftype1, ftype2, mtop, &ncount, &count);
1395 for (i = 0; i < ncount; i++)
1399 sprintf(tabfn, "%s", basefn);
1400 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1401 tabext, i, ftp2ext(efXVG));
1402 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1411 void forcerec_set_ranges(t_forcerec *fr,
1412 int ncg_home, int ncg_force,
1414 int natoms_force_constr, int natoms_f_novirsum)
1419 /* fr->ncg_force is unused in the standard code,
1420 * but it can be useful for modified code dealing with charge groups.
1422 fr->ncg_force = ncg_force;
1423 fr->natoms_force = natoms_force;
1424 fr->natoms_force_constr = natoms_force_constr;
1426 if (fr->natoms_force_constr > fr->nalloc_force)
1428 fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1432 srenew(fr->f_twin, fr->nalloc_force);
1436 if (fr->bF_NoVirSum)
1438 fr->f_novirsum_n = natoms_f_novirsum;
1439 if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1441 fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1442 srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1447 fr->f_novirsum_n = 0;
1451 static real cutoff_inf(real cutoff)
1455 cutoff = GMX_CUTOFF_INF;
1461 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1462 t_forcerec *fr, const t_inputrec *ir,
1463 const char *tabfn, const gmx_mtop_t *mtop,
1471 gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1475 snew(fr->atf_tabs, ir->adress->n_tf_grps);
1477 sprintf(buf, "%s", tabfn);
1478 for (i = 0; i < ir->adress->n_tf_grps; i++)
1480 j = ir->adress->tf_table_index[i]; /* get energy group index */
1481 sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1482 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1485 fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1487 fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1492 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1499 ir->rcoulomb == 0 &&
1501 ir->ePBC == epbcNONE &&
1502 ir->vdwtype == evdwCUT &&
1503 ir->coulombtype == eelCUT &&
1504 ir->efep == efepNO &&
1505 (ir->implicit_solvent == eisNO ||
1506 (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1507 ir->gb_algorithm == egbHCT ||
1508 ir->gb_algorithm == egbOBC))) &&
1509 getenv("GMX_NO_ALLVSALL") == NULL
1512 if (bAllvsAll && ir->opts.ngener > 1)
1514 const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1520 fprintf(stderr, "\n%s\n", note);
1524 fprintf(fp, "\n%s\n", note);
1530 if (bAllvsAll && fp && MASTER(cr))
1532 fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1539 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1543 /* These thread local data structures are used for bondeds only */
1544 fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1546 if (fr->nthreads > 1)
1548 snew(fr->f_t, fr->nthreads);
1549 /* Thread 0 uses the global force and energy arrays */
1550 for (t = 1; t < fr->nthreads; t++)
1552 fr->f_t[t].f = NULL;
1553 fr->f_t[t].f_nalloc = 0;
1554 snew(fr->f_t[t].fshift, SHIFTS);
1555 fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1556 for (i = 0; i < egNR; i++)
1558 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1565 gmx_bool nbnxn_acceleration_supported(FILE *fplog,
1566 const t_commrec *cr,
1567 const t_inputrec *ir,
1570 if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1572 md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1573 bGPU ? "GPUs" : "SIMD kernels",
1574 bGPU ? "CPU only" : "plain-C kernels");
1582 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1586 *kernel_type = nbnxnk4x4_PlainC;
1587 *ewald_excl = ewaldexclTable;
1589 #ifdef GMX_NBNXN_SIMD
1591 #ifdef GMX_NBNXN_SIMD_4XN
1592 *kernel_type = nbnxnk4xN_SIMD_4xN;
1594 #ifdef GMX_NBNXN_SIMD_2XNN
1595 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1598 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1599 /* We need to choose if we want 2x(N+N) or 4xN kernels.
1600 * Currently this is based on the SIMD acceleration choice,
1601 * but it might be better to decide this at runtime based on CPU.
1603 * 4xN calculates more (zero) interactions, but has less pair-search
1604 * work and much better kernel instruction scheduling.
1606 * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1607 * which doesn't have FMA, both the analytical and tabulated Ewald
1608 * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1609 * 2x(4+4) because it results in significantly fewer pairs.
1610 * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1611 * 10% with HT, 50% without HT. As we currently don't detect the actual
1612 * use of HT, use 4x8 to avoid a potential performance hit.
1613 * On Intel Haswell 4x8 is always faster.
1615 *kernel_type = nbnxnk4xN_SIMD_4xN;
1617 #ifndef GMX_SIMD_HAVE_FMA
1618 if (EEL_PME_EWALD(ir->coulombtype) ||
1619 EVDW_PME(ir->vdwtype))
1621 /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1622 * There are enough instructions to make 2x(4+4) efficient.
1624 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1627 #endif /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1630 if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1632 #ifdef GMX_NBNXN_SIMD_4XN
1633 *kernel_type = nbnxnk4xN_SIMD_4xN;
1635 gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1638 if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1640 #ifdef GMX_NBNXN_SIMD_2XNN
1641 *kernel_type = nbnxnk4xN_SIMD_2xNN;
1643 gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1647 /* Analytical Ewald exclusion correction is only an option in
1649 * Since table lookup's don't parallelize with SIMD, analytical
1650 * will probably always be faster for a SIMD width of 8 or more.
1651 * With FMA analytical is sometimes faster for a width if 4 as well.
1652 * On BlueGene/Q, this is faster regardless of precision.
1653 * In single precision, this is faster on Bulldozer.
1655 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1656 (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1657 defined GMX_SIMD_IBM_QPX
1658 *ewald_excl = ewaldexclAnalytical;
1660 if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1662 *ewald_excl = ewaldexclTable;
1664 if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1666 *ewald_excl = ewaldexclAnalytical;
1670 #endif /* GMX_NBNXN_SIMD */
1674 const char *lookup_nbnxn_kernel_name(int kernel_type)
1676 const char *returnvalue = NULL;
1677 switch (kernel_type)
1680 returnvalue = "not set";
1682 case nbnxnk4x4_PlainC:
1683 returnvalue = "plain C";
1685 case nbnxnk4xN_SIMD_4xN:
1686 case nbnxnk4xN_SIMD_2xNN:
1687 #ifdef GMX_NBNXN_SIMD
1688 #if defined GMX_SIMD_X86_SSE2
1689 returnvalue = "SSE2";
1690 #elif defined GMX_SIMD_X86_SSE4_1
1691 returnvalue = "SSE4.1";
1692 #elif defined GMX_SIMD_X86_AVX_128_FMA
1693 returnvalue = "AVX_128_FMA";
1694 #elif defined GMX_SIMD_X86_AVX_256
1695 returnvalue = "AVX_256";
1696 #elif defined GMX_SIMD_X86_AVX2_256
1697 returnvalue = "AVX2_256";
1699 returnvalue = "SIMD";
1701 #else /* GMX_NBNXN_SIMD */
1702 returnvalue = "not available";
1703 #endif /* GMX_NBNXN_SIMD */
1705 case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break;
1706 case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1710 gmx_fatal(FARGS, "Illegal kernel type selected");
1717 static void pick_nbnxn_kernel(FILE *fp,
1718 const t_commrec *cr,
1719 gmx_bool use_simd_kernels,
1721 gmx_bool bEmulateGPU,
1722 const t_inputrec *ir,
1725 gmx_bool bDoNonbonded)
1727 assert(kernel_type);
1729 *kernel_type = nbnxnkNotSet;
1730 *ewald_excl = ewaldexclTable;
1734 *kernel_type = nbnxnk8x8x8_PlainC;
1738 md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1743 *kernel_type = nbnxnk8x8x8_CUDA;
1746 if (*kernel_type == nbnxnkNotSet)
1748 /* LJ PME with LB combination rule does 7 mesh operations.
1749 * This so slow that we don't compile SIMD non-bonded kernels for that.
1751 if (use_simd_kernels &&
1752 nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1754 pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1758 *kernel_type = nbnxnk4x4_PlainC;
1762 if (bDoNonbonded && fp != NULL)
1764 fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1765 lookup_nbnxn_kernel_name(*kernel_type),
1766 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1767 nbnxn_kernel_to_cj_size(*kernel_type));
1769 if (nbnxnk4x4_PlainC == *kernel_type ||
1770 nbnxnk8x8x8_PlainC == *kernel_type)
1772 md_print_warn(cr, fp,
1773 "WARNING: Using the slow %s kernels. This should\n"
1774 "not happen during routine usage on supported platforms.\n\n",
1775 lookup_nbnxn_kernel_name(*kernel_type));
1780 static void pick_nbnxn_resources(FILE *fp,
1781 const t_commrec *cr,
1782 const gmx_hw_info_t *hwinfo,
1783 gmx_bool bDoNonbonded,
1785 gmx_bool *bEmulateGPU,
1786 const gmx_gpu_opt_t *gpu_opt)
1788 gmx_bool bEmulateGPUEnvVarSet;
1789 char gpu_err_str[STRLEN];
1793 bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1795 /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1796 * GPUs (currently) only handle non-bonded calculations, we will
1797 * automatically switch to emulation if non-bonded calculations are
1798 * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1799 * way to turn off GPU initialization, data movement, and cleanup.
1801 * GPU emulation can be useful to assess the performance one can expect by
1802 * adding GPU(s) to the machine. The conditional below allows this even
1803 * if mdrun is compiled without GPU acceleration support.
1804 * Note that you should freezing the system as otherwise it will explode.
1806 *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1808 gpu_opt->ncuda_dev_use > 0));
1810 /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1812 if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1814 /* Each PP node will use the intra-node id-th device from the
1815 * list of detected/selected GPUs. */
1816 if (!init_gpu(fp, cr->rank_pp_intranode, gpu_err_str,
1817 &hwinfo->gpu_info, gpu_opt))
1819 /* At this point the init should never fail as we made sure that
1820 * we have all the GPUs we need. If it still does, we'll bail. */
1821 gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1823 get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1824 cr->rank_pp_intranode),
1828 /* Here we actually turn on hardware GPU acceleration */
1833 gmx_bool uses_simple_tables(int cutoff_scheme,
1834 nonbonded_verlet_t *nbv,
1837 gmx_bool bUsesSimpleTables = TRUE;
1840 switch (cutoff_scheme)
1843 bUsesSimpleTables = TRUE;
1846 assert(NULL != nbv && NULL != nbv->grp);
1847 grp_index = (group < 0) ? 0 : (nbv->ngrp - 1);
1848 bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1851 gmx_incons("unimplemented");
1853 return bUsesSimpleTables;
1856 static void init_ewald_f_table(interaction_const_t *ic,
1857 gmx_bool bUsesSimpleTables,
1862 if (bUsesSimpleTables)
1864 /* Get the Ewald table spacing based on Coulomb and/or LJ
1865 * Ewald coefficients and rtol.
1867 ic->tabq_scale = ewald_spline3_table_scale(ic);
1869 maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1870 ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2;
1874 ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1875 /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1876 ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1879 sfree_aligned(ic->tabq_coul_FDV0);
1880 sfree_aligned(ic->tabq_coul_F);
1881 sfree_aligned(ic->tabq_coul_V);
1883 sfree_aligned(ic->tabq_vdw_FDV0);
1884 sfree_aligned(ic->tabq_vdw_F);
1885 sfree_aligned(ic->tabq_vdw_V);
1887 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1889 /* Create the original table data in FDV0 */
1890 snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1891 snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1892 snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1893 table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1894 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1897 if (EVDW_PME(ic->vdwtype))
1899 snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1900 snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1901 snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1902 table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1903 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1907 void init_interaction_const_tables(FILE *fp,
1908 interaction_const_t *ic,
1909 gmx_bool bUsesSimpleTables,
1912 if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1914 init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1918 fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1919 1/ic->tabq_scale, ic->tabq_size);
1924 static void clear_force_switch_constants(shift_consts_t *sc)
1931 static void force_switch_constants(real p,
1935 /* Here we determine the coefficient for shifting the force to zero
1936 * between distance rsw and the cut-off rc.
1937 * For a potential of r^-p, we have force p*r^-(p+1).
1938 * But to save flops we absorb p in the coefficient.
1940 * force/p = r^-(p+1) + c2*r^2 + c3*r^3
1941 * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1943 sc->c2 = ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1944 sc->c3 = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1945 sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1948 static void potential_switch_constants(real rsw, real rc,
1949 switch_consts_t *sc)
1951 /* The switch function is 1 at rsw and 0 at rc.
1952 * The derivative and second derivate are zero at both ends.
1953 * rsw = max(r - r_switch, 0)
1954 * sw = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1955 * dsw = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1956 * force = force*dsw - potential*sw
1959 sc->c3 = -10*pow(rc - rsw, -3);
1960 sc->c4 = 15*pow(rc - rsw, -4);
1961 sc->c5 = -6*pow(rc - rsw, -5);
1965 init_interaction_const(FILE *fp,
1966 const t_commrec gmx_unused *cr,
1967 interaction_const_t **interaction_const,
1968 const t_forcerec *fr,
1971 interaction_const_t *ic;
1972 gmx_bool bUsesSimpleTables = TRUE;
1973 const real minusSix = -6.0;
1974 const real minusTwelve = -12.0;
1978 /* Just allocate something so we can free it */
1979 snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1980 snew_aligned(ic->tabq_coul_F, 16, 32);
1981 snew_aligned(ic->tabq_coul_V, 16, 32);
1983 ic->rlist = fr->rlist;
1984 ic->rlistlong = fr->rlistlong;
1987 ic->vdwtype = fr->vdwtype;
1988 ic->vdw_modifier = fr->vdw_modifier;
1989 ic->rvdw = fr->rvdw;
1990 ic->rvdw_switch = fr->rvdw_switch;
1991 ic->ewaldcoeff_lj = fr->ewaldcoeff_lj;
1992 ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1993 ic->sh_lj_ewald = 0;
1994 clear_force_switch_constants(&ic->dispersion_shift);
1995 clear_force_switch_constants(&ic->repulsion_shift);
1997 switch (ic->vdw_modifier)
1999 case eintmodPOTSHIFT:
2000 /* Only shift the potential, don't touch the force */
2001 ic->dispersion_shift.cpot = -pow(ic->rvdw, minusSix);
2002 ic->repulsion_shift.cpot = -pow(ic->rvdw, minusTwelve);
2003 if (EVDW_PME(ic->vdwtype))
2007 crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2008 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, minusSix);
2011 case eintmodFORCESWITCH:
2012 /* Switch the force, switch and shift the potential */
2013 force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2014 &ic->dispersion_shift);
2015 force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2016 &ic->repulsion_shift);
2018 case eintmodPOTSWITCH:
2019 /* Switch the potential and force */
2020 potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2024 case eintmodEXACTCUTOFF:
2025 /* Nothing to do here */
2028 gmx_incons("unimplemented potential modifier");
2031 ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2033 /* Electrostatics */
2034 ic->eeltype = fr->eeltype;
2035 ic->coulomb_modifier = fr->coulomb_modifier;
2036 ic->rcoulomb = fr->rcoulomb;
2037 ic->epsilon_r = fr->epsilon_r;
2038 ic->epsfac = fr->epsfac;
2039 ic->ewaldcoeff_q = fr->ewaldcoeff_q;
2041 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2043 ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2050 /* Reaction-field */
2051 if (EEL_RF(ic->eeltype))
2053 ic->epsilon_rf = fr->epsilon_rf;
2054 ic->k_rf = fr->k_rf;
2055 ic->c_rf = fr->c_rf;
2059 /* For plain cut-off we might use the reaction-field kernels */
2060 ic->epsilon_rf = ic->epsilon_r;
2062 if (fr->coulomb_modifier == eintmodPOTSHIFT)
2064 ic->c_rf = 1/ic->rcoulomb;
2074 real dispersion_shift;
2076 dispersion_shift = ic->dispersion_shift.cpot;
2077 if (EVDW_PME(ic->vdwtype))
2079 dispersion_shift -= ic->sh_lj_ewald;
2081 fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2082 ic->repulsion_shift.cpot, dispersion_shift);
2084 if (ic->eeltype == eelCUT)
2086 fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2088 else if (EEL_PME(ic->eeltype))
2090 fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2095 *interaction_const = ic;
2097 if (fr->nbv != NULL && fr->nbv->bUseGPU)
2099 nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2101 /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2102 * also sharing texture references. To keep the code simple, we don't
2103 * treat texture references as shared resources, but this means that
2104 * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2105 * Hence, to ensure that the non-bonded kernels don't start before all
2106 * texture binding operations are finished, we need to wait for all ranks
2107 * to arrive here before continuing.
2109 * Note that we could omit this barrier if GPUs are not shared (or
2110 * texture objects are used), but as this is initialization code, there
2111 * is not point in complicating things.
2113 #ifdef GMX_THREAD_MPI
2118 #endif /* GMX_THREAD_MPI */
2121 bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2122 init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2125 static void init_nb_verlet(FILE *fp,
2126 nonbonded_verlet_t **nb_verlet,
2127 gmx_bool bFEP_NonBonded,
2128 const t_inputrec *ir,
2129 const t_forcerec *fr,
2130 const t_commrec *cr,
2131 const char *nbpu_opt)
2133 nonbonded_verlet_t *nbv;
2136 gmx_bool bEmulateGPU, bHybridGPURun = FALSE;
2138 nbnxn_alloc_t *nb_alloc;
2139 nbnxn_free_t *nb_free;
2143 pick_nbnxn_resources(fp, cr, fr->hwinfo,
2151 nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2152 for (i = 0; i < nbv->ngrp; i++)
2154 nbv->grp[i].nbl_lists.nnbl = 0;
2155 nbv->grp[i].nbat = NULL;
2156 nbv->grp[i].kernel_type = nbnxnkNotSet;
2158 if (i == 0) /* local */
2160 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2161 nbv->bUseGPU, bEmulateGPU, ir,
2162 &nbv->grp[i].kernel_type,
2163 &nbv->grp[i].ewald_excl,
2166 else /* non-local */
2168 if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2170 /* Use GPU for local, select a CPU kernel for non-local */
2171 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2173 &nbv->grp[i].kernel_type,
2174 &nbv->grp[i].ewald_excl,
2177 bHybridGPURun = TRUE;
2181 /* Use the same kernel for local and non-local interactions */
2182 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2183 nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl;
2190 /* init the NxN GPU data; the last argument tells whether we'll have
2191 * both local and non-local NB calculation on GPU */
2192 nbnxn_cuda_init(fp, &nbv->cu_nbv,
2193 &fr->hwinfo->gpu_info, fr->gpu_opt,
2194 cr->rank_pp_intranode,
2195 (nbv->ngrp > 1) && !bHybridGPURun);
2197 if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2201 nbv->min_ci_balanced = strtol(env, &end, 10);
2202 if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2204 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2209 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2210 nbv->min_ci_balanced);
2215 nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2218 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2219 nbv->min_ci_balanced);
2225 nbv->min_ci_balanced = 0;
2230 nbnxn_init_search(&nbv->nbs,
2231 DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2232 DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2234 gmx_omp_nthreads_get(emntPairsearch));
2236 for (i = 0; i < nbv->ngrp; i++)
2238 if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2240 nb_alloc = &pmalloc;
2249 nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2250 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2251 /* 8x8x8 "non-simple" lists are ATM always combined */
2252 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2256 nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2258 gmx_bool bSimpleList;
2259 int enbnxninitcombrule;
2261 bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2263 if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2265 /* Plain LJ cut-off: we can optimize with combination rules */
2266 enbnxninitcombrule = enbnxninitcombruleDETECT;
2268 else if (fr->vdwtype == evdwPME)
2270 /* LJ-PME: we need to use a combination rule for the grid */
2271 if (fr->ljpme_combination_rule == eljpmeGEOM)
2273 enbnxninitcombrule = enbnxninitcombruleGEOM;
2277 enbnxninitcombrule = enbnxninitcombruleLB;
2282 /* We use a full combination matrix: no rule required */
2283 enbnxninitcombrule = enbnxninitcombruleNONE;
2287 snew(nbv->grp[i].nbat, 1);
2288 nbnxn_atomdata_init(fp,
2290 nbv->grp[i].kernel_type,
2292 fr->ntype, fr->nbfp,
2294 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2299 nbv->grp[i].nbat = nbv->grp[0].nbat;
2304 void init_forcerec(FILE *fp,
2305 const output_env_t oenv,
2308 const t_inputrec *ir,
2309 const gmx_mtop_t *mtop,
2310 const t_commrec *cr,
2316 const char *nbpu_opt,
2317 gmx_bool bNoSolvOpt,
2320 int i, m, negp_pp, negptable, egi, egj;
2325 gmx_bool bGenericKernelOnly;
2326 gmx_bool bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2327 gmx_bool bFEP_NonBonded;
2328 int *nm_ind, egp_flags;
2330 if (fr->hwinfo == NULL)
2332 /* Detect hardware, gather information.
2333 * In mdrun, hwinfo has already been set before calling init_forcerec.
2334 * Here we ignore GPUs, as tools will not use them anyhow.
2336 fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2339 /* By default we turn SIMD kernels on, but it might be turned off further down... */
2340 fr->use_simd_kernels = TRUE;
2342 fr->bDomDec = DOMAINDECOMP(cr);
2344 if (check_box(ir->ePBC, box))
2346 gmx_fatal(FARGS, check_box(ir->ePBC, box));
2349 /* Test particle insertion ? */
2352 /* Set to the size of the molecule to be inserted (the last one) */
2353 /* Because of old style topologies, we have to use the last cg
2354 * instead of the last molecule type.
2356 cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2357 fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2358 if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2360 gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2368 /* Copy AdResS parameters */
2371 fr->adress_type = ir->adress->type;
2372 fr->adress_const_wf = ir->adress->const_wf;
2373 fr->adress_ex_width = ir->adress->ex_width;
2374 fr->adress_hy_width = ir->adress->hy_width;
2375 fr->adress_icor = ir->adress->icor;
2376 fr->adress_site = ir->adress->site;
2377 fr->adress_ex_forcecap = ir->adress->ex_forcecap;
2378 fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2381 snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2382 for (i = 0; i < ir->adress->n_energy_grps; i++)
2384 fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2387 fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2388 snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2389 for (i = 0; i < fr->n_adress_tf_grps; i++)
2391 fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2393 copy_rvec(ir->adress->refs, fr->adress_refs);
2397 fr->adress_type = eAdressOff;
2398 fr->adress_do_hybridpairs = FALSE;
2401 /* Copy the user determined parameters */
2402 fr->userint1 = ir->userint1;
2403 fr->userint2 = ir->userint2;
2404 fr->userint3 = ir->userint3;
2405 fr->userint4 = ir->userint4;
2406 fr->userreal1 = ir->userreal1;
2407 fr->userreal2 = ir->userreal2;
2408 fr->userreal3 = ir->userreal3;
2409 fr->userreal4 = ir->userreal4;
2412 fr->fc_stepsize = ir->fc_stepsize;
2415 fr->efep = ir->efep;
2416 fr->sc_alphavdw = ir->fepvals->sc_alpha;
2417 if (ir->fepvals->bScCoul)
2419 fr->sc_alphacoul = ir->fepvals->sc_alpha;
2420 fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2424 fr->sc_alphacoul = 0;
2425 fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2427 fr->sc_power = ir->fepvals->sc_power;
2428 fr->sc_r_power = ir->fepvals->sc_r_power;
2429 fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2431 env = getenv("GMX_SCSIGMA_MIN");
2435 sscanf(env, "%20lf", &dbl);
2436 fr->sc_sigma6_min = pow(dbl, 6);
2439 fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2443 fr->bNonbonded = TRUE;
2444 if (getenv("GMX_NO_NONBONDED") != NULL)
2446 /* turn off non-bonded calculations */
2447 fr->bNonbonded = FALSE;
2448 md_print_warn(cr, fp,
2449 "Found environment variable GMX_NO_NONBONDED.\n"
2450 "Disabling nonbonded calculations.\n");
2453 bGenericKernelOnly = FALSE;
2455 /* We now check in the NS code whether a particular combination of interactions
2456 * can be used with water optimization, and disable it if that is not the case.
2459 if (getenv("GMX_NB_GENERIC") != NULL)
2464 "Found environment variable GMX_NB_GENERIC.\n"
2465 "Disabling all interaction-specific nonbonded kernels, will only\n"
2466 "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2468 bGenericKernelOnly = TRUE;
2471 if (bGenericKernelOnly == TRUE)
2476 if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2478 fr->use_simd_kernels = FALSE;
2482 "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2483 "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2487 fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2489 /* Check if we can/should do all-vs-all kernels */
2490 fr->bAllvsAll = can_use_allvsall(ir, FALSE, NULL, NULL);
2491 fr->AllvsAll_work = NULL;
2492 fr->AllvsAll_workgb = NULL;
2494 /* All-vs-all kernels have not been implemented in 4.6, and
2495 * the SIMD group kernels are also buggy in this case. Non-SIMD
2496 * group kernels are OK. See Redmine #1249. */
2499 fr->bAllvsAll = FALSE;
2500 fr->use_simd_kernels = FALSE;
2504 "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2505 "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2506 "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2507 "of an unfixed bug. The reference C kernels are correct, though, so\n"
2508 "we are proceeding by disabling all CPU architecture-specific\n"
2509 "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2510 "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2514 /* Neighbour searching stuff */
2515 fr->cutoff_scheme = ir->cutoff_scheme;
2516 fr->bGrid = (ir->ns_type == ensGRID);
2517 fr->ePBC = ir->ePBC;
2519 if (fr->cutoff_scheme == ecutsGROUP)
2521 const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2522 "removed in a future release when 'verlet' supports all interaction forms.\n";
2526 fprintf(stderr, "\n%s\n", note);
2530 fprintf(fp, "\n%s\n", note);
2534 /* Determine if we will do PBC for distances in bonded interactions */
2535 if (fr->ePBC == epbcNONE)
2537 fr->bMolPBC = FALSE;
2541 if (!DOMAINDECOMP(cr))
2543 /* The group cut-off scheme and SHAKE assume charge groups
2544 * are whole, but not using molpbc is faster in most cases.
2546 if (fr->cutoff_scheme == ecutsGROUP ||
2547 (ir->eConstrAlg == econtSHAKE &&
2548 (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2549 gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2551 fr->bMolPBC = ir->bPeriodicMols;
2556 if (getenv("GMX_USE_GRAPH") != NULL)
2558 fr->bMolPBC = FALSE;
2561 fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2568 fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2571 fr->bGB = (ir->implicit_solvent == eisGBSA);
2573 fr->rc_scaling = ir->refcoord_scaling;
2574 copy_rvec(ir->posres_com, fr->posres_com);
2575 copy_rvec(ir->posres_comB, fr->posres_comB);
2576 fr->rlist = cutoff_inf(ir->rlist);
2577 fr->rlistlong = cutoff_inf(ir->rlistlong);
2578 fr->eeltype = ir->coulombtype;
2579 fr->vdwtype = ir->vdwtype;
2580 fr->ljpme_combination_rule = ir->ljpme_combination_rule;
2582 fr->coulomb_modifier = ir->coulomb_modifier;
2583 fr->vdw_modifier = ir->vdw_modifier;
2585 /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2586 switch (fr->eeltype)
2589 fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2595 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2599 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2600 fr->coulomb_modifier = eintmodEXACTCUTOFF;
2609 case eelPMEUSERSWITCH:
2610 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2615 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2619 gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2623 /* Vdw: Translate from mdp settings to kernel format */
2624 switch (fr->vdwtype)
2629 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2633 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2637 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2643 case evdwENCADSHIFT:
2644 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2648 gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2652 /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2653 fr->nbkernel_elec_modifier = fr->coulomb_modifier;
2654 fr->nbkernel_vdw_modifier = fr->vdw_modifier;
2656 fr->rvdw = cutoff_inf(ir->rvdw);
2657 fr->rvdw_switch = ir->rvdw_switch;
2658 fr->rcoulomb = cutoff_inf(ir->rcoulomb);
2659 fr->rcoulomb_switch = ir->rcoulomb_switch;
2661 fr->bTwinRange = fr->rlistlong > fr->rlist;
2662 fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2664 fr->reppow = mtop->ffparams.reppow;
2666 if (ir->cutoff_scheme == ecutsGROUP)
2668 fr->bvdwtab = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2669 && !EVDW_PME(fr->vdwtype));
2670 /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2671 fr->bcoultab = !(fr->eeltype == eelCUT ||
2672 fr->eeltype == eelEWALD ||
2673 fr->eeltype == eelPME ||
2674 fr->eeltype == eelRF ||
2675 fr->eeltype == eelRF_ZERO);
2677 /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2678 * going to be faster to tabulate the interaction than calling the generic kernel.
2679 * However, if generic kernels have been requested we keep things analytically.
2681 if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2682 fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2683 bGenericKernelOnly == FALSE)
2685 if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2687 fr->bcoultab = TRUE;
2688 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2689 * which would otherwise need two tables.
2693 else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2694 ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2695 fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2696 (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2698 if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2700 fr->bcoultab = TRUE;
2704 if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2706 fr->bcoultab = TRUE;
2708 if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2713 if (getenv("GMX_REQUIRE_TABLES"))
2716 fr->bcoultab = TRUE;
2721 fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2722 fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]);
2725 if (fr->bvdwtab == TRUE)
2727 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2728 fr->nbkernel_vdw_modifier = eintmodNONE;
2730 if (fr->bcoultab == TRUE)
2732 fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2733 fr->nbkernel_elec_modifier = eintmodNONE;
2737 if (ir->cutoff_scheme == ecutsVERLET)
2739 if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2741 gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2743 fr->bvdwtab = FALSE;
2744 fr->bcoultab = FALSE;
2747 /* Tables are used for direct ewald sum */
2750 if (EEL_PME(ir->coulombtype))
2754 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2756 if (ir->coulombtype == eelP3M_AD)
2758 please_cite(fp, "Hockney1988");
2759 please_cite(fp, "Ballenegger2012");
2763 please_cite(fp, "Essmann95a");
2766 if (ir->ewald_geometry == eewg3DC)
2770 fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2772 please_cite(fp, "In-Chul99a");
2775 fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2776 init_ewald_tab(&(fr->ewald_table), ir, fp);
2779 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2780 1/fr->ewaldcoeff_q);
2784 if (EVDW_PME(ir->vdwtype))
2788 fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2790 please_cite(fp, "Essmann95a");
2791 fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2794 fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2795 1/fr->ewaldcoeff_lj);
2799 /* Electrostatics */
2800 fr->epsilon_r = ir->epsilon_r;
2801 fr->epsilon_rf = ir->epsilon_rf;
2802 fr->fudgeQQ = mtop->ffparams.fudgeQQ;
2804 /* Parameters for generalized RF */
2808 if (fr->eeltype == eelGRF)
2810 init_generalized_rf(fp, mtop, ir, fr);
2813 fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2814 gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2815 gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2816 IR_ELEC_FIELD(*ir) ||
2817 (fr->adress_icor != eAdressICOff)
2820 if (fr->cutoff_scheme == ecutsGROUP &&
2821 ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2823 /* Count the total number of charge groups */
2824 fr->cg_nalloc = ncg_mtop(mtop);
2825 srenew(fr->cg_cm, fr->cg_nalloc);
2827 if (fr->shift_vec == NULL)
2829 snew(fr->shift_vec, SHIFTS);
2832 if (fr->fshift == NULL)
2834 snew(fr->fshift, SHIFTS);
2837 if (fr->nbfp == NULL)
2839 fr->ntype = mtop->ffparams.atnr;
2840 fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2841 if (EVDW_PME(fr->vdwtype))
2843 fr->ljpme_c6grid = make_ljpme_c6grid(&mtop->ffparams, fr);
2847 /* Copy the energy group exclusions */
2848 fr->egp_flags = ir->opts.egp_flags;
2850 /* Van der Waals stuff */
2851 if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2853 if (fr->rvdw_switch >= fr->rvdw)
2855 gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2856 fr->rvdw_switch, fr->rvdw);
2860 fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2861 (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2862 fr->rvdw_switch, fr->rvdw);
2866 if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2868 gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2871 if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2873 gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2876 if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2878 gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2883 fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n",
2884 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2887 fr->eDispCorr = ir->eDispCorr;
2888 if (ir->eDispCorr != edispcNO)
2890 set_avcsixtwelve(fp, fr, mtop);
2895 set_bham_b_max(fp, fr, mtop);
2898 fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2900 /* Copy the GBSA data (radius, volume and surftens for each
2901 * atomtype) from the topology atomtype section to forcerec.
2903 snew(fr->atype_radius, fr->ntype);
2904 snew(fr->atype_vol, fr->ntype);
2905 snew(fr->atype_surftens, fr->ntype);
2906 snew(fr->atype_gb_radius, fr->ntype);
2907 snew(fr->atype_S_hct, fr->ntype);
2909 if (mtop->atomtypes.nr > 0)
2911 for (i = 0; i < fr->ntype; i++)
2913 fr->atype_radius[i] = mtop->atomtypes.radius[i];
2915 for (i = 0; i < fr->ntype; i++)
2917 fr->atype_vol[i] = mtop->atomtypes.vol[i];
2919 for (i = 0; i < fr->ntype; i++)
2921 fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2923 for (i = 0; i < fr->ntype; i++)
2925 fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2927 for (i = 0; i < fr->ntype; i++)
2929 fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2933 /* Generate the GB table if needed */
2937 fr->gbtabscale = 2000;
2939 fr->gbtabscale = 500;
2943 fr->gbtab = make_gb_table(oenv, fr);
2945 init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2947 /* Copy local gb data (for dd, this is done in dd_partition_system) */
2948 if (!DOMAINDECOMP(cr))
2950 make_local_gb(cr, fr->born, ir->gb_algorithm);
2954 /* Set the charge scaling */
2955 if (fr->epsilon_r != 0)
2957 fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2961 /* eps = 0 is infinite dieletric: no coulomb interactions */
2965 /* Reaction field constants */
2966 if (EEL_RF(fr->eeltype))
2968 calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2969 fr->rcoulomb, fr->temp, fr->zsquare, box,
2970 &fr->kappa, &fr->k_rf, &fr->c_rf);
2973 /*This now calculates sum for q and c6*/
2974 set_chargesum(fp, fr, mtop);
2976 /* if we are using LR electrostatics, and they are tabulated,
2977 * the tables will contain modified coulomb interactions.
2978 * Since we want to use the non-shifted ones for 1-4
2979 * coulombic interactions, we must have an extra set of tables.
2982 /* Construct tables.
2983 * A little unnecessary to make both vdw and coul tables sometimes,
2984 * but what the heck... */
2986 bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2987 (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2989 bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2990 fr->coulomb_modifier != eintmodNONE ||
2991 fr->vdw_modifier != eintmodNONE ||
2992 fr->bBHAM || fr->bEwald) &&
2993 (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2994 gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2995 gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2997 negp_pp = ir->opts.ngener - ir->nwall;
3001 bSomeNormalNbListsAreInUse = TRUE;
3006 bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
3007 for (egi = 0; egi < negp_pp; egi++)
3009 for (egj = egi; egj < negp_pp; egj++)
3011 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3012 if (!(egp_flags & EGP_EXCL))
3014 if (egp_flags & EGP_TABLE)
3020 bSomeNormalNbListsAreInUse = TRUE;
3025 if (bSomeNormalNbListsAreInUse)
3027 fr->nnblists = negptable + 1;
3031 fr->nnblists = negptable;
3033 if (fr->nnblists > 1)
3035 snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3044 snew(fr->nblists, fr->nnblists);
3046 /* This code automatically gives table length tabext without cut-off's,
3047 * in that case grompp should already have checked that we do not need
3048 * normal tables and we only generate tables for 1-4 interactions.
3050 rtab = ir->rlistlong + ir->tabext;
3054 /* make tables for ordinary interactions */
3055 if (bSomeNormalNbListsAreInUse)
3057 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3060 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3062 if (!bMakeSeparate14Table)
3064 fr->tab14 = fr->nblists[0].table_elec_vdw;
3074 /* Read the special tables for certain energy group pairs */
3075 nm_ind = mtop->groups.grps[egcENER].nm_ind;
3076 for (egi = 0; egi < negp_pp; egi++)
3078 for (egj = egi; egj < negp_pp; egj++)
3080 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3081 if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3083 if (fr->nnblists > 1)
3085 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3087 /* Read the table file with the two energy groups names appended */
3088 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3089 *mtop->groups.grpname[nm_ind[egi]],
3090 *mtop->groups.grpname[nm_ind[egj]],
3094 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3095 *mtop->groups.grpname[nm_ind[egi]],
3096 *mtop->groups.grpname[nm_ind[egj]],
3097 &fr->nblists[fr->nnblists/2+m]);
3101 else if (fr->nnblists > 1)
3103 fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3109 else if ((fr->eDispCorr != edispcNO) &&
3110 ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3111 (fr->vdw_modifier == eintmodFORCESWITCH) ||
3112 (fr->vdw_modifier == eintmodPOTSHIFT)))
3114 /* Tables might not be used for the potential modifier interactions per se, but
3115 * we still need them to evaluate switch/shift dispersion corrections in this case.
3117 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3120 if (bMakeSeparate14Table)
3122 /* generate extra tables with plain Coulomb for 1-4 interactions only */
3123 fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3124 GMX_MAKETABLES_14ONLY);
3127 /* Read AdResS Thermo Force table if needed */
3128 if (fr->adress_icor == eAdressICThermoForce)
3130 /* old todo replace */
3132 if (ir->adress->n_tf_grps > 0)
3134 make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3139 /* load the default table */
3140 snew(fr->atf_tabs, 1);
3141 fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3146 fr->nwall = ir->nwall;
3147 if (ir->nwall && ir->wall_type == ewtTABLE)
3149 make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3154 fcd->bondtab = make_bonded_tables(fp,
3155 F_TABBONDS, F_TABBONDSNC,
3157 fcd->angletab = make_bonded_tables(fp,
3160 fcd->dihtab = make_bonded_tables(fp,
3168 fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3172 /* QM/MM initialization if requested
3176 fprintf(stderr, "QM/MM calculation requested.\n");
3179 fr->bQMMM = ir->bQMMM;
3180 fr->qr = mk_QMMMrec();
3182 /* Set all the static charge group info */
3183 fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3185 &fr->bExcl_IntraCGAll_InterCGNone);
3186 if (DOMAINDECOMP(cr))
3192 fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3195 if (!DOMAINDECOMP(cr))
3197 forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3198 mtop->natoms, mtop->natoms, mtop->natoms);
3201 fr->print_force = print_force;
3204 /* coarse load balancing vars */
3209 /* Initialize neighbor search */
3210 init_ns(fp, cr, &fr->ns, fr, mtop);
3212 if (cr->duty & DUTY_PP)
3214 gmx_nonbonded_setup(fr, bGenericKernelOnly);
3218 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3223 /* Initialize the thread working data for bonded interactions */
3224 init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3226 snew(fr->excl_load, fr->nthreads+1);
3228 if (fr->cutoff_scheme == ecutsVERLET)
3230 if (ir->rcoulomb != ir->rvdw)
3232 gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3235 init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3238 /* fr->ic is used both by verlet and group kernels (to some extent) now */
3239 init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3241 if (ir->eDispCorr != edispcNO)
3243 calc_enervirdiff(fp, ir->eDispCorr, fr);
3247 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3248 #define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i)
3249 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3251 void pr_forcerec(FILE *fp, t_forcerec *fr)
3255 pr_real(fp, fr->rlist);
3256 pr_real(fp, fr->rcoulomb);
3257 pr_real(fp, fr->fudgeQQ);
3258 pr_bool(fp, fr->bGrid);
3259 pr_bool(fp, fr->bTwinRange);
3260 /*pr_int(fp,fr->cg0);
3261 pr_int(fp,fr->hcg);*/
3262 for (i = 0; i < fr->nnblists; i++)
3264 pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3266 pr_real(fp, fr->rcoulomb_switch);
3267 pr_real(fp, fr->rcoulomb);
3272 void forcerec_set_excl_load(t_forcerec *fr,
3273 const gmx_localtop_t *top)
3276 int t, i, j, ntot, n, ntarget;
3278 ind = top->excls.index;
3282 for (i = 0; i < top->excls.nr; i++)
3284 for (j = ind[i]; j < ind[i+1]; j++)
3293 fr->excl_load[0] = 0;
3296 for (t = 1; t <= fr->nthreads; t++)
3298 ntarget = (ntot*t)/fr->nthreads;
3299 while (i < top->excls.nr && n < ntarget)
3301 for (j = ind[i]; j < ind[i+1]; j++)
3310 fr->excl_load[t] = i;
3314 /* Frees GPU memory and destroys the CUDA context.
3316 * Note that this function needs to be called even if GPUs are not used
3317 * in this run because the PME ranks have no knowledge of whether GPUs
3318 * are used or not, but all ranks need to enter the barrier below.
3320 void free_gpu_resources(const t_forcerec *fr,
3321 const t_commrec *cr,
3322 const gmx_gpu_info_t *gpu_info,
3323 const gmx_gpu_opt_t *gpu_opt)
3325 gmx_bool bIsPPrankUsingGPU;
3326 char gpu_err_str[STRLEN];
3328 bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
3330 if (bIsPPrankUsingGPU)
3332 /* free nbnxn data in GPU memory */
3333 nbnxn_cuda_free(fr->nbv->cu_nbv);
3335 /* With tMPI we need to wait for all ranks to finish deallocation before
3336 * destroying the context in free_gpu() as some ranks may be sharing
3338 * Note: as only PP ranks need to free GPU resources, so it is safe to
3339 * not call the barrier on PME ranks.
3341 #ifdef GMX_THREAD_MPI
3346 #endif /* GMX_THREAD_MPI */
3348 /* uninitialize GPU (by destroying the context) */
3349 if (!free_gpu(cr->rank_pp_intranode, gpu_err_str, gpu_info, gpu_opt))
3351 gmx_warning("On rank %d failed to free GPU #%d: %s",
3352 cr->nodeid, get_current_gpu_device_id(), gpu_err_str);